You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@falcon.apache.org by sh...@apache.org on 2014/07/10 08:57:30 UTC
[1/9] FALCON-369 Refactor workflow builder. Contributed by Shwetha GS
Repository: incubator-falcon
Updated Branches:
refs/heads/master 57953f77b -> 185b58885
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/src/main/examples/entity/filesystem/replication-feed.xml
----------------------------------------------------------------------
diff --git a/src/main/examples/entity/filesystem/replication-feed.xml b/src/main/examples/entity/filesystem/replication-feed.xml
new file mode 100644
index 0000000..ccf5cca
--- /dev/null
+++ b/src/main/examples/entity/filesystem/replication-feed.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<feed description="replication feed" name="repl-feed" xmlns="uri:falcon:feed:0.1">
+ <groups>input</groups>
+
+ <frequency>minutes(1)</frequency>
+ <timezone>UTC</timezone>
+ <late-arrival cut-off="hours(1)"/>
+
+ <clusters>
+ <cluster name="local" type="source">
+ <validity start="2013-01-01T00:00Z" end="2030-01-01T00:00Z"/>
+ <retention limit="hours(2)" action="delete"/>
+ </cluster>
+ <cluster name="local-target" type="target">
+ <validity start="2013-11-15T00:00Z" end="2030-01-01T00:00Z"/>
+ <retention limit="hours(2)" action="delete"/>
+ <locations>
+ <location type="data" path="/data/repl-in/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}"/>
+ </locations>
+ </cluster>
+ </clusters>
+
+ <locations>
+ <location type="data" path="/data/in/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}"/>
+ </locations>
+
+ <ACL owner="testuser" group="group" permission="0x644"/>
+ <schema location="/schema/log/log.format.csv" provider="csv"/>
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/src/main/examples/entity/filesystem/standalone-cluster.xml
----------------------------------------------------------------------
diff --git a/src/main/examples/entity/filesystem/standalone-cluster.xml b/src/main/examples/entity/filesystem/standalone-cluster.xml
index 3c3c9f1..6fe4df3 100644
--- a/src/main/examples/entity/filesystem/standalone-cluster.xml
+++ b/src/main/examples/entity/filesystem/standalone-cluster.xml
@@ -23,7 +23,7 @@
-->
<cluster colo="local" description="Standalone cluster" name="local" xmlns="uri:falcon:cluster:0.1">
<interfaces>
- <interface type="readonly" endpoint="hftp://localhost:50010" version="1.1.2"/>
+ <interface type="readonly" endpoint="hdfs://localhost:8020" version="1.1.2"/>
<interface type="write" endpoint="hdfs://localhost:8020" version="1.1.2"/>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/src/main/examples/entity/filesystem/standalone-target-cluster.xml
----------------------------------------------------------------------
diff --git a/src/main/examples/entity/filesystem/standalone-target-cluster.xml b/src/main/examples/entity/filesystem/standalone-target-cluster.xml
new file mode 100644
index 0000000..442449d
--- /dev/null
+++ b/src/main/examples/entity/filesystem/standalone-target-cluster.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<!--
+ Example cluster definition that can be used with falcon and standalone
+ hadoop and oozie instances
+-->
+<cluster colo="local" description="Standalone cluster" name="local-target" xmlns="uri:falcon:cluster:0.1">
+ <interfaces>
+ <interface type="readonly" endpoint="hdfs://localhost:8020" version="1.1.2"/>
+
+ <interface type="write" endpoint="hdfs://localhost:8020" version="1.1.2"/>
+
+ <interface type="execute" endpoint="localhost:8021" version="1.1.2"/>
+
+ <interface type="workflow" endpoint="http://localhost:11000/oozie/" version="4.0.0"/>
+
+ <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true" version="5.4.3"/>
+ </interfaces>
+ <locations>
+ <location name="staging" path="/projects/falcon/staging-target"/>
+ <location name="temp" path="/projects/falcon/tmp-target"/>
+ <location name="working" path="/projects/falcon/working-target"/>
+ </locations>
+ <properties>
+ </properties>
+</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/src/main/examples/entity/hcat/hcat-in-feed.xml
----------------------------------------------------------------------
diff --git a/src/main/examples/entity/hcat/hcat-in-feed.xml b/src/main/examples/entity/hcat/hcat-in-feed.xml
index 77f70db..5b2c998 100644
--- a/src/main/examples/entity/hcat/hcat-in-feed.xml
+++ b/src/main/examples/entity/hcat/hcat-in-feed.xml
@@ -16,7 +16,7 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-<feed description="input" name="in" xmlns="uri:falcon:feed:0.1">
+<feed description="input" name="hcat-in" xmlns="uri:falcon:feed:0.1">
<groups>input</groups>
<frequency>minutes(1)</frequency>
@@ -24,7 +24,7 @@
<late-arrival cut-off="hours(1)"/>
<clusters>
- <cluster name="local">
+ <cluster name="hcat-local">
<validity start="2013-01-01T00:00Z" end="2030-01-01T00:00Z"/>
<retention limit="hours(2)" action="delete"/>
</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/src/main/examples/entity/hcat/hcat-out-feed.xml
----------------------------------------------------------------------
diff --git a/src/main/examples/entity/hcat/hcat-out-feed.xml b/src/main/examples/entity/hcat/hcat-out-feed.xml
index f09b2ed..26d1f18 100644
--- a/src/main/examples/entity/hcat/hcat-out-feed.xml
+++ b/src/main/examples/entity/hcat/hcat-out-feed.xml
@@ -16,7 +16,7 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-<feed description="output" name="out" xmlns="uri:falcon:feed:0.1">
+<feed description="output" name="hcat-out" xmlns="uri:falcon:feed:0.1">
<groups>output</groups>
<frequency>minutes(5)</frequency>
@@ -24,7 +24,7 @@
<late-arrival cut-off="hours(1)"/>
<clusters>
- <cluster name="local">
+ <cluster name="hcat-local">
<validity start="2013-01-01T00:00Z" end="2030-01-01T00:00Z"/>
<retention limit="hours(2)" action="delete"/>
</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/src/main/examples/entity/hcat/hcat-pig-process.xml
----------------------------------------------------------------------
diff --git a/src/main/examples/entity/hcat/hcat-pig-process.xml b/src/main/examples/entity/hcat/hcat-pig-process.xml
index 753e5b0..6ee17eb 100644
--- a/src/main/examples/entity/hcat/hcat-pig-process.xml
+++ b/src/main/examples/entity/hcat/hcat-pig-process.xml
@@ -19,7 +19,7 @@
<process name="hcat-pig-process" xmlns="uri:falcon:process:0.1">
<clusters>
- <cluster name="local">
+ <cluster name="hcat-local">
<validity start="2013-11-15T00:05Z" end="2013-11-15T01:05Z"/>
</cluster>
</clusters>
@@ -30,13 +30,13 @@
<timezone>UTC</timezone>
<inputs>
- <!-- In the pig script, the input paths will be available in a variable 'inpaths' -->
- <input name="input" feed="in" start="now(0,-5)" end="now(0,-1)"/>
+ <!-- In the pig script, the input paths will be available in a variable 'inparts' -->
+ <input name="inparts" feed="in" start="now(0,-5)" end="now(0,-1)"/>
</inputs>
<outputs>
- <!-- In the pig script, the output path will be available in a variable 'outpath' -->
- <output name="output" feed="out" instance="now(0,0)"/>
+ <!-- In the pig script, the output path will be available in a variable 'outparts' -->
+ <output name="outpart" feed="out" instance="now(0,0)"/>
</outputs>
<workflow engine="pig" path="/app/pig/hcat-wordcount.pig"/>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/src/main/examples/entity/hcat/hcat-replication-feed.xml
----------------------------------------------------------------------
diff --git a/src/main/examples/entity/hcat/hcat-replication-feed.xml b/src/main/examples/entity/hcat/hcat-replication-feed.xml
new file mode 100644
index 0000000..2b43d28
--- /dev/null
+++ b/src/main/examples/entity/hcat/hcat-replication-feed.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<feed description="input" name="hcat-repl-feed" xmlns="uri:falcon:feed:0.1">
+ <groups>input</groups>
+
+ <frequency>minutes(1)</frequency>
+ <timezone>UTC</timezone>
+ <late-arrival cut-off="hours(1)"/>
+
+ <clusters>
+ <cluster name="hcat-local" type="source">
+ <validity start="2013-01-01T00:00Z" end="2030-01-01T00:00Z"/>
+ <retention limit="hours(2)" action="delete"/>
+ </cluster>
+ <cluster name="hcat-local-target" type="target">
+ <validity start="2013-11-15T00:00Z" end="2030-01-01T00:00Z"/>
+ <retention limit="hours(2)" action="delete"/>
+ <table uri="catalog:default:repl_in_table#ds=${YEAR}-${MONTH}-${DAY}-${HOUR}-${MINUTE}" />
+ </cluster>
+ </clusters>
+
+ <table uri="catalog:default:in_table#ds=${YEAR}-${MONTH}-${DAY}-${HOUR}-${MINUTE}" />
+
+ <ACL owner="testuser" group="group" permission="0x644"/>
+ <schema location="/schema/log/log.format.csv" provider="csv"/>
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/src/main/examples/entity/hcat/hcat-standalone-cluster.xml
----------------------------------------------------------------------
diff --git a/src/main/examples/entity/hcat/hcat-standalone-cluster.xml b/src/main/examples/entity/hcat/hcat-standalone-cluster.xml
index 30f0cb1..7962b1f 100644
--- a/src/main/examples/entity/hcat/hcat-standalone-cluster.xml
+++ b/src/main/examples/entity/hcat/hcat-standalone-cluster.xml
@@ -21,7 +21,7 @@
Example cluster definition that can be used with falcon and standalone
hadoop and oozie instances
-->
-<cluster colo="local" description="Standalone cluster with hacatalog" name="local" xmlns="uri:falcon:cluster:0.1">
+<cluster colo="local" description="Standalone cluster with hacatalog" name="hcat-local" xmlns="uri:falcon:cluster:0.1">
<interfaces>
<interface type="readonly" endpoint="hftp://localhost:50010" version="1.1.2"/>
@@ -36,9 +36,9 @@
<interface type="registry" endpoint="thrift://localhost:12000" version="0.11.0"/>
</interfaces>
<locations>
- <location name="staging" path="/projects/falcon/staging"/>
- <location name="temp" path="/projects/falcon/tmp"/>
- <location name="working" path="/projects/falcon/working"/>
+ <location name="staging" path="/projects/falcon/hcat-staging"/>
+ <location name="temp" path="/projects/falcon/hcat-tmp"/>
+ <location name="working" path="/projects/falcon/hcat-working"/>
</locations>
<properties>
</properties>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/src/main/examples/entity/hcat/hcat-standalone-target-cluster.xml
----------------------------------------------------------------------
diff --git a/src/main/examples/entity/hcat/hcat-standalone-target-cluster.xml b/src/main/examples/entity/hcat/hcat-standalone-target-cluster.xml
new file mode 100644
index 0000000..8de78cf
--- /dev/null
+++ b/src/main/examples/entity/hcat/hcat-standalone-target-cluster.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<!--
+ Example cluster definition that can be used with falcon and standalone
+ hadoop and oozie instances
+-->
+<cluster colo="hcat-local-target" description="Standalone cluster with hacatalog" name="hcat-local-target" xmlns="uri:falcon:cluster:0.1">
+ <interfaces>
+ <interface type="readonly" endpoint="hftp://localhost:50010" version="1.1.2"/>
+
+ <interface type="write" endpoint="hdfs://localhost:8020" version="1.1.2"/>
+
+ <interface type="execute" endpoint="localhost:8021" version="1.1.2"/>
+
+ <interface type="workflow" endpoint="http://localhost:11000/oozie/" version="4.0.0"/>
+
+ <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true" version="5.4.3"/>
+
+ <interface type="registry" endpoint="thrift://localhost:12000" version="0.11.0"/>
+ </interfaces>
+ <locations>
+ <location name="staging" path="/projects/falcon/hcat-staging-target"/>
+ <location name="temp" path="/projects/falcon/hcat-tmp-target"/>
+ <location name="working" path="/projects/falcon/hcat-working-target"/>
+ </locations>
+ <properties>
+ </properties>
+</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/src/main/examples/entity/hcat/hive-process.xml
----------------------------------------------------------------------
diff --git a/src/main/examples/entity/hcat/hive-process.xml b/src/main/examples/entity/hcat/hive-process.xml
index 0f3c540..ad8b219 100644
--- a/src/main/examples/entity/hcat/hive-process.xml
+++ b/src/main/examples/entity/hcat/hive-process.xml
@@ -19,7 +19,7 @@
<process name="hive-process" xmlns="uri:falcon:process:0.1">
<clusters>
- <cluster name="local">
+ <cluster name="hcat-local">
<validity start="2013-11-15T00:05Z" end="2013-11-15T01:05Z"/>
</cluster>
</clusters>
@@ -30,13 +30,13 @@
<timezone>UTC</timezone>
<inputs>
- <!-- In the pig script, the input paths will be available in a variable 'inpaths' -->
- <input name="input" feed="in" start="now(0,-5)" end="now(0,-1)"/>
+ <!-- In the pig script, the input paths will be available in a variable 'inparts' -->
+ <input name="inparts" feed="hcat-in" start="now(0,-5)" end="now(0,-1)"/>
</inputs>
<outputs>
- <!-- In the pig script, the output path will be available in a variable 'outpath' -->
- <output name="output" feed="out" instance="now(0,0)"/>
+ <!-- In the pig script, the output path will be available in a variable 'outpart' -->
+ <output name="outpart" feed="hcat-out" instance="now(0,0)"/>
</outputs>
<workflow engine="hive" path="/app/hive/wordcount.hql"/>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/webapp/pom.xml
----------------------------------------------------------------------
diff --git a/webapp/pom.xml b/webapp/pom.xml
index d1e1ec1..9a5050e 100644
--- a/webapp/pom.xml
+++ b/webapp/pom.xml
@@ -404,6 +404,7 @@
<artifactId>maven-jetty-plugin</artifactId>
<version>${jetty.version}</version>
<configuration>
+ <skip>${skipTests}</skip>
<connectors>
<connector implementation="org.mortbay.jetty.security.SslSocketConnector">
<port>41443</port>
[5/9] FALCON-369 Refactor workflow builder. Contributed by Shwetha GS
Posted by sh...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/resources/workflow/replication-workflow.xml
----------------------------------------------------------------------
diff --git a/oozie/src/main/resources/workflow/replication-workflow.xml b/oozie/src/main/resources/workflow/replication-workflow.xml
new file mode 100644
index 0000000..0748acf
--- /dev/null
+++ b/oozie/src/main/resources/workflow/replication-workflow.xml
@@ -0,0 +1,330 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<workflow-app xmlns='uri:oozie:workflow:0.3' name='falcon-feed-parent-workflow'>
+ <start to='should-record'/>
+ <decision name='should-record'>
+ <switch>
+ <case to="recordsize">
+ ${shouldRecord=="true"}
+ </case>
+ <default to="replication-decision"/>
+ </switch>
+ </decision>
+ <action name='recordsize'>
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ <!-- HCatalog jars -->
+ <property>
+ <name>oozie.action.sharelib.for.java</name>
+ <value>hcatalog</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.latedata.LateDataHandler</main-class>
+ <arg>-out</arg>
+ <arg>${logDir}/latedata/${nominalTime}/${srcClusterName}</arg>
+ <arg>-paths</arg>
+ <arg>${falconInPaths}</arg>
+ <arg>-falconInputFeeds</arg>
+ <arg>${falconInputFeeds}</arg>
+ <arg>-falconInputFeedStorageTypes</arg>
+ <arg>${falconInputFeedStorageTypes}</arg>
+ <capture-output/>
+ </java>
+ <ok to="replication-decision"/>
+ <error to="failed-post-processing"/>
+ </action>
+ <decision name="replication-decision">
+ <switch>
+ <case to="table-export">
+ ${falconFeedStorageType == "TABLE"}
+ </case>
+ <default to="replication"/>
+ </switch>
+ </decision>
+ <!-- Table Replication - Export data and metadata to HDFS Staging from Source Hive -->
+ <action name="table-export">
+ <hive xmlns="uri:oozie:hive-action:0.2">
+ <job-tracker>${falconSourceJobTracker}</job-tracker>
+ <name-node>${falconSourceNameNode}</name-node>
+ <prepare>
+ <delete path="${distcpSourcePaths}"/>
+ </prepare>
+ <job-xml>${wf:appPath()}/conf/falcon-source-hive-site.xml</job-xml>
+ <configuration>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ </configuration>
+ <script>${wf:appPath()}/scripts/falcon-table-export.hql</script>
+ <param>falconSourceDatabase=${falconSourceDatabase}</param>
+ <param>falconSourceTable=${falconSourceTable}</param>
+ <param>falconSourcePartition=${falconSourcePartition}</param>
+ <param>falconSourceStagingDir=${distcpSourcePaths}</param>
+ </hive>
+ <ok to="replication"/>
+ <error to="failed-post-processing"/>
+ </action>
+ <!-- Replication action -->
+ <action name="replication">
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property> <!-- hadoop 2 parameter -->
+ <name>oozie.launcher.mapreduce.job.user.classpath.first</name>
+ <value>true</value>
+ </property>
+ <property> <!-- hadoop 1 parameter -->
+ <name>oozie.launcher.mapreduce.user.classpath.first</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.replication.FeedReplicator</main-class>
+ <arg>-Dfalcon.include.path=${sourceRelativePaths}</arg>
+ <arg>-Dmapred.job.queue.name=${queueName}</arg>
+ <arg>-Dmapred.job.priority=${jobPriority}</arg>
+ <arg>-maxMaps</arg>
+ <arg>${maxMaps}</arg>
+ <arg>-mapBandwidthKB</arg>
+ <arg>${mapBandwidthKB}</arg>
+ <arg>-sourcePaths</arg>
+ <arg>${distcpSourcePaths}</arg>
+ <arg>-targetPath</arg>
+ <arg>${distcpTargetPaths}</arg>
+ <arg>-falconFeedStorageType</arg>
+ <arg>${falconFeedStorageType}</arg>
+ <file>${wf:conf("falcon.libpath")}/hadoop-distcp.jar</file>
+ </java>
+ <ok to="post-replication-decision"/>
+ <error to="failed-post-processing"/>
+ </action>
+ <decision name="post-replication-decision">
+ <switch>
+ <case to="table-import">
+ ${falconFeedStorageType == "TABLE"}
+ </case>
+ <default to="succeeded-post-processing"/>
+ </switch>
+ </decision>
+ <!-- Table Replication - Import data and metadata from HDFS Staging into Target Hive -->
+ <action name="table-import">
+ <hive xmlns="uri:oozie:hive-action:0.2">
+ <job-tracker>${falconTargetJobTracker}</job-tracker>
+ <name-node>${falconTargetNameNode}</name-node>
+ <job-xml>${wf:appPath()}/conf/falcon-target-hive-site.xml</job-xml>
+ <configuration>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ </configuration>
+ <script>${wf:appPath()}/scripts/falcon-table-import.hql</script>
+ <param>falconTargetDatabase=${falconTargetDatabase}</param>
+ <param>falconTargetTable=${falconTargetTable}</param>
+ <param>falconTargetPartition=${falconTargetPartition}</param>
+ <param>falconTargetStagingDir=${distcpTargetPaths}</param>
+ </hive>
+ <ok to="cleanup-table-staging-dir"/>
+ <error to="failed-post-processing"/>
+ </action>
+ <action name="cleanup-table-staging-dir">
+ <fs>
+ <delete path="${distcpSourcePaths}"/>
+ <delete path="${distcpTargetPaths}"/>
+ </fs>
+ <ok to="succeeded-post-processing"/>
+ <error to="failed-post-processing"/>
+ </action>
+ <action name='succeeded-post-processing'>
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.workflow.FalconPostProcessing</main-class>
+ <arg>-cluster</arg>
+ <arg>${cluster}</arg>
+ <arg>-entityType</arg>
+ <arg>${entityType}</arg>
+ <arg>-entityName</arg>
+ <arg>${entityName}</arg>
+ <arg>-nominalTime</arg>
+ <arg>${nominalTime}</arg>
+ <arg>-operation</arg>
+ <arg>REPLICATE</arg>
+ <arg>-workflowId</arg>
+ <arg>${wf:id()}</arg>
+ <arg>-runId</arg>
+ <arg>${wf:run()}</arg>
+ <arg>-status</arg>
+ <arg>SUCCEEDED</arg>
+ <arg>-timeStamp</arg>
+ <arg>${timeStamp}</arg>
+ <arg>-brokerImplClass</arg>
+ <arg>${wf:conf("broker.impl.class")}</arg>
+ <arg>-brokerUrl</arg>
+ <arg>${wf:conf("broker.url")}</arg>
+ <arg>-userBrokerImplClass</arg>
+ <arg>${userBrokerImplClass}</arg>
+ <arg>-userBrokerUrl</arg>
+ <arg>${userBrokerUrl}</arg>
+ <arg>-brokerTTL</arg>
+ <arg>${wf:conf("broker.ttlInMins")}</arg>
+ <arg>-feedNames</arg>
+ <arg>${feedNames}</arg>
+ <arg>-feedInstancePaths</arg>
+ <arg>${feedInstancePaths}</arg>
+ <arg>-logFile</arg>
+ <arg>${logDir}/instancePaths-${nominalTime}-${srcClusterName}.csv</arg>
+ <arg>-workflowEngineUrl</arg>
+ <arg>${workflowEngineUrl}</arg>
+ <arg>-userWorkflowName</arg>
+ <arg>${userWorkflowName}</arg>
+ <arg>-userWorkflowVersion</arg>
+ <arg>${userWorkflowVersion}</arg>
+ <arg>-userWorkflowEngine</arg>
+ <arg>${userWorkflowEngine}</arg>
+ <arg>-subflowId</arg>
+ <arg>${wf:id()}</arg>
+ <arg>-logDir</arg>
+ <arg>${logDir}/job-${nominalTime}/${srcClusterName}/</arg>
+ <arg>-workflowUser</arg>
+ <arg>${wf:user()}</arg>
+ <arg>-falconInputFeeds</arg>
+ <arg>${falconInputFeeds}</arg>
+ <arg>-falconInputPaths</arg>
+ <arg>${falconInPaths}</arg>
+ <file>${wf:conf("falcon.libpath")}/activemq-core.jar</file>
+ <file>${wf:conf("falcon.libpath")}/geronimo-j2ee-management.jar</file>
+ <file>${wf:conf("falcon.libpath")}/jms.jar</file>
+ <file>${wf:conf("falcon.libpath")}/json-simple.jar</file>
+ <file>${wf:conf("falcon.libpath")}/oozie-client.jar</file>
+ <file>${wf:conf("falcon.libpath")}/spring-jms.jar</file>
+ </java>
+ <ok to="end"/>
+ <error to="fail"/>
+ </action>
+ <action name='failed-post-processing'>
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.workflow.FalconPostProcessing</main-class>
+ <arg>-cluster</arg>
+ <arg>${cluster}</arg>
+ <arg>-entityType</arg>
+ <arg>${entityType}</arg>
+ <arg>-entityName</arg>
+ <arg>${entityName}</arg>
+ <arg>-nominalTime</arg>
+ <arg>${nominalTime}</arg>
+ <arg>-operation</arg>
+ <arg>REPLICATE</arg>
+ <arg>-workflowId</arg>
+ <arg>${wf:id()}</arg>
+ <arg>-runId</arg>
+ <arg>${wf:run()}</arg>
+ <arg>-status</arg>
+ <arg>FAILED</arg>
+ <arg>-timeStamp</arg>
+ <arg>${timeStamp}</arg>
+ <arg>-brokerImplClass</arg>
+ <arg>${wf:conf("broker.impl.class")}</arg>
+ <arg>-brokerUrl</arg>
+ <arg>${wf:conf("broker.url")}</arg>
+ <arg>-userBrokerImplClass</arg>
+ <arg>${userBrokerImplClass}</arg>
+ <arg>-userBrokerUrl</arg>
+ <arg>${userBrokerUrl}</arg>
+ <arg>-brokerTTL</arg>
+ <arg>${wf:conf("broker.ttlInMins")}</arg>
+ <arg>-feedNames</arg>
+ <arg>${feedNames}</arg>
+ <arg>-feedInstancePaths</arg>
+ <arg>${feedInstancePaths}</arg>
+ <arg>-logFile</arg>
+ <arg>${logDir}/instancePaths-${nominalTime}-${srcClusterName}.csv</arg>
+ <arg>-workflowEngineUrl</arg>
+ <arg>${workflowEngineUrl}</arg>
+ <arg>-subflowId</arg>
+ <arg>${wf:id()}</arg>
+ <arg>-logDir</arg>
+ <arg>${logDir}/job-${nominalTime}/${srcClusterName}/</arg>
+ <arg>-workflowUser</arg>
+ <arg>${wf:user()}</arg>
+ <file>${wf:conf("falcon.libpath")}/activemq-core.jar</file>
+ <file>${wf:conf("falcon.libpath")}/geronimo-j2ee-management.jar</file>
+ <file>${wf:conf("falcon.libpath")}/jms.jar</file>
+ <file>${wf:conf("falcon.libpath")}/json-simple.jar</file>
+ <file>${wf:conf("falcon.libpath")}/oozie-client.jar</file>
+ <file>${wf:conf("falcon.libpath")}/spring-jms.jar</file>
+ </java>
+ <ok to="fail"/>
+ <error to="fail"/>
+ </action>
+ <kill name="fail">
+ <message>
+ Workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+ </message>
+ </kill>
+ <end name='end'/>
+</workflow-app>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/resources/workflow/retention-workflow.xml
----------------------------------------------------------------------
diff --git a/oozie/src/main/resources/workflow/retention-workflow.xml b/oozie/src/main/resources/workflow/retention-workflow.xml
new file mode 100644
index 0000000..5138865
--- /dev/null
+++ b/oozie/src/main/resources/workflow/retention-workflow.xml
@@ -0,0 +1,208 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<workflow-app xmlns='uri:oozie:workflow:0.3' name='falcon-feed-parent-workflow'>
+ <start to='eviction'/>
+ <action name="eviction">
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ <!-- HCatalog jars -->
+ <property>
+ <name>oozie.action.sharelib.for.java</name>
+ <value>hcatalog</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.retention.FeedEvictor</main-class>
+ <arg>-feedBasePath</arg>
+ <arg>${feedDataPath}</arg>
+ <arg>-falconFeedStorageType</arg>
+ <arg>${falconFeedStorageType}</arg>
+ <arg>-retentionType</arg>
+ <arg>instance</arg>
+ <arg>-retentionLimit</arg>
+ <arg>${limit}</arg>
+ <arg>-timeZone</arg>
+ <arg>${timeZone}</arg>
+ <arg>-frequency</arg>
+ <arg>${frequency}</arg>
+ <arg>-logFile</arg>
+ <arg>${logDir}/instancePaths-${nominalTime}.csv</arg>
+ </java>
+ <ok to="succeeded-post-processing"/>
+ <error to="failed-post-processing"/>
+ </action>
+ <action name='succeeded-post-processing'>
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.workflow.FalconPostProcessing</main-class>
+ <arg>-cluster</arg>
+ <arg>${cluster}</arg>
+ <arg>-entityType</arg>
+ <arg>${entityType}</arg>
+ <arg>-entityName</arg>
+ <arg>${entityName}</arg>
+ <arg>-nominalTime</arg>
+ <arg>${nominalTime}</arg>
+ <arg>-operation</arg>
+ <arg>DELETE</arg>
+ <arg>-workflowId</arg>
+ <arg>${wf:id()}</arg>
+ <arg>-runId</arg>
+ <arg>${wf:run()}</arg>
+ <arg>-status</arg>
+ <arg>SUCCEEDED</arg>
+ <arg>-timeStamp</arg>
+ <arg>${timeStamp}</arg>
+ <arg>-brokerImplClass</arg>
+ <arg>${wf:conf("broker.impl.class")}</arg>
+ <arg>-brokerUrl</arg>
+ <arg>${wf:conf("broker.url")}</arg>
+ <arg>-userBrokerImplClass</arg>
+ <arg>${userBrokerImplClass}</arg>
+ <arg>-userBrokerUrl</arg>
+ <arg>${userBrokerUrl}</arg>
+ <arg>-brokerTTL</arg>
+ <arg>${wf:conf("broker.ttlInMins")}</arg>
+ <arg>-feedNames</arg>
+ <arg>${feedNames}</arg>
+ <arg>-feedInstancePaths</arg>
+ <arg>${feedInstancePaths}</arg>
+ <arg>-logFile</arg>
+ <arg>${logDir}/instancePaths-${nominalTime}.csv</arg>
+ <arg>-workflowEngineUrl</arg>
+ <arg>${workflowEngineUrl}</arg>
+ <arg>-subflowId</arg>
+ <arg>${wf:id()}</arg>
+ <arg>-logDir</arg>
+ <arg>${logDir}/job-${nominalTime}/</arg>
+ <arg>-workflowUser</arg>
+ <arg>${wf:user()}</arg>
+ <arg>-userWorkflowName</arg>
+ <arg>${userWorkflowName}</arg>
+ <arg>-userWorkflowVersion</arg>
+ <arg>${userWorkflowVersion}</arg>
+ <arg>-userWorkflowEngine</arg>
+ <arg>${userWorkflowEngine}</arg>
+ <arg>-falconInputFeeds</arg>
+ <arg>${falconInputFeeds}</arg>
+ <arg>-falconInputPaths</arg>
+ <arg>${falconInPaths}</arg>
+ <file>${wf:conf("falcon.libpath")}/activemq-core.jar</file>
+ <file>${wf:conf("falcon.libpath")}/geronimo-j2ee-management.jar</file>
+ <file>${wf:conf("falcon.libpath")}/jms.jar</file>
+ <file>${wf:conf("falcon.libpath")}/json-simple.jar</file>
+ <file>${wf:conf("falcon.libpath")}/oozie-client.jar</file>
+ <file>${wf:conf("falcon.libpath")}/spring-jms.jar</file>
+ </java>
+ <ok to="end"/>
+ <error to="fail"/>
+ </action>
+ <action name='failed-post-processing'>
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.workflow.FalconPostProcessing</main-class>
+ <arg>-cluster</arg>
+ <arg>${cluster}</arg>
+ <arg>-entityType</arg>
+ <arg>${entityType}</arg>
+ <arg>-entityName</arg>
+ <arg>${entityName}</arg>
+ <arg>-nominalTime</arg>
+ <arg>${nominalTime}</arg>
+ <arg>-operation</arg>
+ <arg>DELETE</arg>
+ <arg>-workflowId</arg>
+ <arg>${wf:id()}</arg>
+ <arg>-runId</arg>
+ <arg>${wf:run()}</arg>
+ <arg>-status</arg>
+ <arg>FAILED</arg>
+ <arg>-timeStamp</arg>
+ <arg>${timeStamp}</arg>
+ <arg>-brokerImplClass</arg>
+ <arg>${wf:conf("broker.impl.class")}</arg>
+ <arg>-brokerUrl</arg>
+ <arg>${wf:conf("broker.url")}</arg>
+ <arg>-userBrokerImplClass</arg>
+ <arg>${userBrokerImplClass}</arg>
+ <arg>-userBrokerUrl</arg>
+ <arg>${userBrokerUrl}</arg>
+ <arg>-brokerTTL</arg>
+ <arg>${wf:conf("broker.ttlInMins")}</arg>
+ <arg>-feedNames</arg>
+ <arg>${feedNames}</arg>
+ <arg>-feedInstancePaths</arg>
+ <arg>${feedInstancePaths}</arg>
+ <arg>-logFile</arg>
+ <arg>${logDir}/instancePaths-${nominalTime}.csv</arg>
+ <arg>-workflowEngineUrl</arg>
+ <arg>${workflowEngineUrl}</arg>
+ <arg>-subflowId</arg>
+ <arg>${wf:id()}</arg>
+ <arg>-logDir</arg>
+ <arg>${logDir}/job-${nominalTime}/</arg>
+ <arg>-workflowUser</arg>
+ <arg>${wf:user()}</arg>
+ <file>${wf:conf("falcon.libpath")}/activemq-core.jar</file>
+ <file>${wf:conf("falcon.libpath")}/geronimo-j2ee-management.jar</file>
+ <file>${wf:conf("falcon.libpath")}/jms.jar</file>
+ <file>${wf:conf("falcon.libpath")}/json-simple.jar</file>
+ <file>${wf:conf("falcon.libpath")}/oozie-client.jar</file>
+ <file>${wf:conf("falcon.libpath")}/spring-jms.jar</file>
+ </java>
+ <ok to="fail"/>
+ <error to="fail"/>
+ </action>
+ <kill name="fail">
+ <message>
+ Workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+ </message>
+ </kill>
+ <end name='end'/>
+</workflow-app>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/java/org/apache/falcon/oozie/feed/OozieFeedWorkflowBuilderTest.java
----------------------------------------------------------------------
diff --git a/oozie/src/test/java/org/apache/falcon/oozie/feed/OozieFeedWorkflowBuilderTest.java b/oozie/src/test/java/org/apache/falcon/oozie/feed/OozieFeedWorkflowBuilderTest.java
new file mode 100644
index 0000000..542634d
--- /dev/null
+++ b/oozie/src/test/java/org/apache/falcon/oozie/feed/OozieFeedWorkflowBuilderTest.java
@@ -0,0 +1,673 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.falcon.oozie.feed;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.falcon.FalconException;
+import org.apache.falcon.Tag;
+import org.apache.falcon.cluster.util.EmbeddedCluster;
+import org.apache.falcon.entity.CatalogStorage;
+import org.apache.falcon.entity.ClusterHelper;
+import org.apache.falcon.entity.EntityUtil;
+import org.apache.falcon.entity.FeedHelper;
+import org.apache.falcon.entity.Storage;
+import org.apache.falcon.entity.store.ConfigurationStore;
+import org.apache.falcon.entity.v0.Entity;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.SchemaHelper;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.cluster.Interfacetype;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.oozie.OozieCoordinatorBuilder;
+import org.apache.falcon.oozie.OozieEntityBuilder;
+import org.apache.falcon.oozie.OozieOrchestrationWorkflowBuilder;
+import org.apache.falcon.oozie.bundle.BUNDLEAPP;
+import org.apache.falcon.oozie.bundle.COORDINATOR;
+import org.apache.falcon.oozie.coordinator.CONFIGURATION.Property;
+import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
+import org.apache.falcon.oozie.coordinator.SYNCDATASET;
+import org.apache.falcon.oozie.process.AbstractTestBase;
+import org.apache.falcon.oozie.workflow.ACTION;
+import org.apache.falcon.oozie.workflow.DECISION;
+import org.apache.falcon.oozie.workflow.JAVA;
+import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
+import org.apache.falcon.security.CurrentUser;
+import org.apache.falcon.security.SecurityUtil;
+import org.apache.falcon.util.StartupProperties;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBElement;
+import javax.xml.bind.JAXBException;
+import java.io.IOException;
+import java.util.Calendar;
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+/**
+ * Tests for Oozie workflow definition for feed replication & retention.
+ */
+public class OozieFeedWorkflowBuilderTest extends AbstractTestBase {
+ private EmbeddedCluster srcMiniDFS;
+ private EmbeddedCluster trgMiniDFS;
+ private final ConfigurationStore store = ConfigurationStore.get();
+ private Cluster srcCluster;
+ private Cluster trgCluster;
+ private Cluster alphaTrgCluster;
+ private Cluster betaTrgCluster;
+ private Feed feed;
+ private Feed tableFeed;
+ private Feed fsReplFeed;
+
+ private static final String SRC_CLUSTER_PATH = "/feed/src-cluster.xml";
+ private static final String TRG_CLUSTER_PATH = "/feed/trg-cluster.xml";
+ private static final String FEED = "/feed/feed.xml";
+ private static final String TABLE_FEED = "/feed/table-replication-feed.xml";
+ private static final String FS_REPLICATION_FEED = "/feed/fs-replication-feed.xml";
+
+ @BeforeClass
+ public void setUpDFS() throws Exception {
+ CurrentUser.authenticate("falcon");
+
+ srcMiniDFS = EmbeddedCluster.newCluster("cluster1");
+ String srcHdfsUrl = srcMiniDFS.getConf().get("fs.default.name");
+
+ trgMiniDFS = EmbeddedCluster.newCluster("cluster2");
+ String trgHdfsUrl = trgMiniDFS.getConf().get("fs.default.name");
+
+ cleanupStore();
+
+ org.apache.falcon.entity.v0.cluster.Property property =
+ new org.apache.falcon.entity.v0.cluster.Property();
+ property.setName(OozieOrchestrationWorkflowBuilder.METASTORE_KERBEROS_PRINCIPAL);
+ property.setValue("hive/_HOST");
+
+ srcCluster = (Cluster) storeEntity(EntityType.CLUSTER, SRC_CLUSTER_PATH, srcHdfsUrl);
+ srcCluster.getProperties().getProperties().add(property);
+
+ trgCluster = (Cluster) storeEntity(EntityType.CLUSTER, TRG_CLUSTER_PATH, trgHdfsUrl);
+ trgCluster.getProperties().getProperties().add(property);
+
+ alphaTrgCluster = (Cluster) storeEntity(EntityType.CLUSTER, "/feed/trg-cluster-alpha.xml", trgHdfsUrl);
+ betaTrgCluster = (Cluster) storeEntity(EntityType.CLUSTER, "/feed/trg-cluster-beta.xml", trgHdfsUrl);
+
+ feed = (Feed) storeEntity(EntityType.FEED, FEED);
+ fsReplFeed = (Feed) storeEntity(EntityType.FEED, FS_REPLICATION_FEED);
+ tableFeed = (Feed) storeEntity(EntityType.FEED, TABLE_FEED);
+ }
+
+ private Entity storeEntity(EntityType type, String resource) throws Exception {
+ return storeEntity(type, null, resource, null);
+ }
+
+ private Entity storeEntity(EntityType type, String resource, String writeUrl) throws Exception {
+ return storeEntity(type, null, resource, writeUrl);
+ }
+
+ protected void cleanupStore() throws FalconException {
+ for (EntityType type : EntityType.values()) {
+ Collection<String> entities = store.getEntities(type);
+ for (String entity : entities) {
+ store.remove(type, entity);
+ }
+ }
+ }
+
+ @AfterClass
+ public void stopDFS() {
+ srcMiniDFS.shutdown();
+ trgMiniDFS.shutdown();
+ }
+
+ @Test
+ public void testReplicationCoordsForFSStorage() throws Exception {
+ OozieEntityBuilder builder = OozieEntityBuilder.get(feed);
+ Path bundlePath = new Path("/projects/falcon/");
+ builder.build(trgCluster, bundlePath);
+ BUNDLEAPP bundle = getBundle(trgMiniDFS.getFileSystem(), bundlePath);
+ List<COORDINATOR> coords = bundle.getCoordinator();
+
+ //Assert retention coord
+ COORDINATORAPP coord = getCoordinator(trgMiniDFS, coords.get(0).getAppPath());
+ assertLibExtensions(coord, "retention");
+
+ //Assert replication coord
+ coord = getCoordinator(trgMiniDFS, coords.get(1).getAppPath());
+ Assert.assertEquals("2010-01-01T00:40Z", coord.getStart());
+ Assert.assertEquals(getWorkflowAppPath(), coord.getAction().getWorkflow().getAppPath());
+ Assert.assertEquals("FALCON_FEED_REPLICATION_" + feed.getName() + "_"
+ + srcCluster.getName(), coord.getName());
+ Assert.assertEquals("${coord:minutes(20)}", coord.getFrequency());
+ SYNCDATASET inputDataset = (SYNCDATASET) coord.getDatasets()
+ .getDatasetOrAsyncDataset().get(0);
+ SYNCDATASET outputDataset = (SYNCDATASET) coord.getDatasets()
+ .getDatasetOrAsyncDataset().get(1);
+
+ Assert.assertEquals("${coord:minutes(20)}", inputDataset.getFrequency());
+ Assert.assertEquals("input-dataset", inputDataset.getName());
+ Assert.assertEquals(
+ ClusterHelper.getReadOnlyStorageUrl(srcCluster)
+ + "/examples/input-data/rawLogs/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}",
+ inputDataset.getUriTemplate());
+
+ Assert.assertEquals("${coord:minutes(20)}",
+ outputDataset.getFrequency());
+ Assert.assertEquals("output-dataset", outputDataset.getName());
+ Assert.assertEquals(ClusterHelper.getStorageUrl(trgCluster)
+ + "/examples/input-data/rawLogs/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}",
+ outputDataset.getUriTemplate());
+ String inEventName =coord.getInputEvents().getDataIn().get(0).getName();
+ String inEventDataset =coord.getInputEvents().getDataIn().get(0).getDataset();
+ String inEventInstance = coord.getInputEvents().getDataIn().get(0).getInstance().get(0);
+ Assert.assertEquals("input", inEventName);
+ Assert.assertEquals("input-dataset", inEventDataset);
+ Assert.assertEquals("${now(0,-40)}", inEventInstance);
+
+ String outEventInstance = coord.getOutputEvents().getDataOut().get(0).getInstance();
+ Assert.assertEquals("${now(0,-40)}", outEventInstance);
+
+ HashMap<String, String> props = new HashMap<String, String>();
+ for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
+ props.put(prop.getName(), prop.getValue());
+ }
+
+ // verify the replication param that feed replicator depends on
+ String pathsWithPartitions = getPathsWithPartitions(srcCluster, trgCluster, feed);
+ Assert.assertEquals(props.get("sourceRelativePaths"), pathsWithPartitions);
+
+ Assert.assertEquals(props.get("sourceRelativePaths"), "${coord:dataIn('input')}");
+ Assert.assertEquals(props.get("distcpSourcePaths"), "${coord:dataIn('input')}");
+ Assert.assertEquals(props.get("distcpTargetPaths"), "${coord:dataOut('output')}");
+ Assert.assertEquals(props.get("falconFeedStorageType"), Storage.TYPE.FILESYSTEM.name());
+
+ // verify the late data params
+ Assert.assertEquals(props.get("falconInputFeeds"), feed.getName());
+ Assert.assertEquals(props.get("falconInPaths"), "${coord:dataIn('input')}");
+ Assert.assertEquals(props.get("falconInPaths"), pathsWithPartitions);
+ Assert.assertEquals(props.get("falconInputFeedStorageTypes"), Storage.TYPE.FILESYSTEM.name());
+ Assert.assertEquals(props.get("logDir"), getLogPath(trgCluster, feed));
+
+ // verify the post processing params
+ Assert.assertEquals(props.get("feedNames"), feed.getName());
+ Assert.assertEquals(props.get("feedInstancePaths"), "${coord:dataOut('output')}");
+
+ // verify workflow params
+ Assert.assertEquals(props.get("userWorkflowName"), "replication-policy");
+ Assert.assertEquals(props.get("userWorkflowVersion"), "0.5");
+ Assert.assertEquals(props.get("userWorkflowEngine"), "falcon");
+
+ // verify default params
+ Assert.assertEquals(props.get("queueName"), "default");
+ Assert.assertEquals(props.get("jobPriority"), "NORMAL");
+ Assert.assertEquals(props.get("maxMaps"), "5");
+ Assert.assertEquals(props.get("mapBandwidthKB"), "102400");
+
+ assertLibExtensions(coord, "replication");
+ WORKFLOWAPP wf = getWorkflowapp(coord);
+ assertWorkflowRetries(wf);
+
+ Assert.assertFalse(Storage.TYPE.TABLE == FeedHelper.getStorageType(feed, trgCluster));
+ }
+
+ private COORDINATORAPP getCoordinator(EmbeddedCluster cluster, String appPath) throws Exception {
+ return getCoordinator(cluster.getFileSystem(), new Path(StringUtils.removeStart(appPath, "${nameNode}")));
+ }
+
+ private String getWorkflowAppPath() {
+ return "${nameNode}/projects/falcon/REPLICATION/" + srcCluster.getName();
+ }
+
+ private void assertWorkflowRetries(COORDINATORAPP coord) throws JAXBException, IOException {
+ assertWorkflowRetries(getWorkflowapp(coord));
+ }
+
+ private void assertWorkflowRetries(WORKFLOWAPP wf) throws JAXBException, IOException {
+ List<Object> actions = wf.getDecisionOrForkOrJoin();
+ for (Object obj : actions) {
+ if (!(obj instanceof ACTION)) {
+ continue;
+ }
+ ACTION action = (ACTION) obj;
+ String actionName = action.getName();
+ if (OozieOrchestrationWorkflowBuilder.FALCON_ACTIONS.contains(actionName)) {
+ Assert.assertEquals(action.getRetryMax(), "3");
+ Assert.assertEquals(action.getRetryInterval(), "1");
+ }
+ }
+ }
+
+ private void assertLibExtensions(COORDINATORAPP coord, String lifecycle) throws Exception {
+ WORKFLOWAPP wf = getWorkflowapp(coord);
+ List<Object> actions = wf.getDecisionOrForkOrJoin();
+ for (Object obj : actions) {
+ if (!(obj instanceof ACTION)) {
+ continue;
+ }
+ ACTION action = (ACTION) obj;
+ List<String> files = null;
+ if (action.getJava() != null) {
+ files = action.getJava().getFile();
+ } else if (action.getPig() != null) {
+ files = action.getPig().getFile();
+ } else if (action.getMapReduce() != null) {
+ files = action.getMapReduce().getFile();
+ }
+ if (files != null) {
+ Assert.assertTrue(files.get(files.size() - 1).endsWith("/projects/falcon/working/libext/FEED/"
+ + lifecycle + "/ext.jar"));
+ }
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ private WORKFLOWAPP getWorkflowapp(COORDINATORAPP coord) throws JAXBException, IOException {
+ String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
+ JAXBContext jaxbContext = JAXBContext.newInstance(WORKFLOWAPP.class);
+ return ((JAXBElement<WORKFLOWAPP>) jaxbContext.createUnmarshaller().unmarshal(
+ trgMiniDFS.getFileSystem().open(new Path(wfPath, "workflow.xml")))).getValue();
+ }
+
+ @Test
+ public void testReplicationCoordsForFSStorageWithMultipleTargets() throws Exception {
+ OozieCoordinatorBuilder builder = OozieCoordinatorBuilder.get(fsReplFeed, Tag.REPLICATION);
+
+ List<Properties> alphaCoords = builder.buildCoords(alphaTrgCluster, new Path("/alpha/falcon/"));
+ final COORDINATORAPP alphaCoord = getCoordinator(trgMiniDFS,
+ alphaCoords.get(0).getProperty(OozieEntityBuilder.ENTITY_PATH));
+ Assert.assertEquals(alphaCoord.getStart(), "2012-10-01T12:05Z");
+ Assert.assertEquals(alphaCoord.getEnd(), "2012-10-01T12:11Z");
+
+ String pathsWithPartitions = getPathsWithPartitions(srcCluster, alphaTrgCluster, fsReplFeed);
+ assertReplCoord(alphaCoord, fsReplFeed, alphaTrgCluster.getName(), pathsWithPartitions);
+
+ List<Properties> betaCoords = builder.buildCoords(betaTrgCluster, new Path("/beta/falcon/"));
+ final COORDINATORAPP betaCoord = getCoordinator(trgMiniDFS,
+ betaCoords.get(0).getProperty(OozieEntityBuilder.ENTITY_PATH));
+ Assert.assertEquals(betaCoord.getStart(), "2012-10-01T12:10Z");
+ Assert.assertEquals(betaCoord.getEnd(), "2012-10-01T12:26Z");
+
+ pathsWithPartitions = getPathsWithPartitions(srcCluster, betaTrgCluster, fsReplFeed);
+ assertReplCoord(betaCoord, fsReplFeed, betaTrgCluster.getName(), pathsWithPartitions);
+ }
+
+ private String getPathsWithPartitions(Cluster sourceCluster, Cluster targetCluster,
+ Feed aFeed) throws FalconException {
+ String srcPart = FeedHelper.normalizePartitionExpression(
+ FeedHelper.getCluster(aFeed, sourceCluster.getName()).getPartition());
+ srcPart = FeedHelper.evaluateClusterExp(sourceCluster, srcPart);
+ String targetPart = FeedHelper.normalizePartitionExpression(
+ FeedHelper.getCluster(aFeed, targetCluster.getName()).getPartition());
+ targetPart = FeedHelper.evaluateClusterExp(targetCluster, targetPart);
+
+ String pathsWithPartitions = "${coord:dataIn('input')}/"
+ + FeedHelper.normalizePartitionExpression(srcPart, targetPart);
+ String parts = pathsWithPartitions.replaceAll("//+", "/");
+ parts = StringUtils.stripEnd(parts, "/");
+ return parts;
+ }
+
+ private void assertReplCoord(COORDINATORAPP coord, Feed aFeed, String clusterName,
+ String pathsWithPartitions) throws JAXBException, IOException {
+ org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(aFeed, clusterName);
+ Date startDate = feedCluster.getValidity().getStart();
+ Assert.assertEquals(coord.getStart(), SchemaHelper.formatDateUTC(startDate));
+
+ Date endDate = feedCluster.getValidity().getEnd();
+ Assert.assertEquals(coord.getEnd(), SchemaHelper.formatDateUTC(endDate));
+
+ WORKFLOWAPP workflow = getWorkflowapp(coord);
+ assertWorkflowDefinition(fsReplFeed, workflow);
+
+ List<Object> actions = workflow.getDecisionOrForkOrJoin();
+ System.out.println("actions = " + actions);
+
+ ACTION replicationActionNode = (ACTION) actions.get(4);
+ Assert.assertEquals(replicationActionNode.getName(), "replication");
+
+ JAVA replication = replicationActionNode.getJava();
+ List<String> args = replication.getArg();
+ Assert.assertEquals(args.size(), 13);
+
+ HashMap<String, String> props = new HashMap<String, String>();
+ for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
+ props.put(prop.getName(), prop.getValue());
+ }
+
+ Assert.assertEquals(props.get("sourceRelativePaths"), pathsWithPartitions);
+ Assert.assertEquals(props.get("sourceRelativePaths"), "${coord:dataIn('input')}/" + srcCluster.getColo());
+ Assert.assertEquals(props.get("distcpSourcePaths"), "${coord:dataIn('input')}");
+ Assert.assertEquals(props.get("distcpTargetPaths"), "${coord:dataOut('output')}");
+ Assert.assertEquals(props.get("falconFeedStorageType"), Storage.TYPE.FILESYSTEM.name());
+ Assert.assertEquals(props.get("maxMaps"), "33");
+ Assert.assertEquals(props.get("mapBandwidthKB"), "2048");
+ Assert.assertEquals(props.get("logDir"), getLogPath(trgCluster, aFeed));
+ }
+
+ public void assertWorkflowDefinition(Feed aFeed, WORKFLOWAPP parentWorkflow) {
+ Assert.assertEquals(EntityUtil.getWorkflowName(Tag.REPLICATION, aFeed).toString(), parentWorkflow.getName());
+
+ List<Object> decisionOrForkOrJoin = parentWorkflow.getDecisionOrForkOrJoin();
+ Assert.assertEquals("should-record", ((DECISION) decisionOrForkOrJoin.get(0)).getName());
+ Assert.assertEquals("recordsize", ((ACTION) decisionOrForkOrJoin.get(1)).getName());
+ Assert.assertEquals("replication-decision", ((DECISION) decisionOrForkOrJoin.get(2)).getName());
+ Assert.assertEquals("table-export", ((ACTION) decisionOrForkOrJoin.get(3)).getName());
+ Assert.assertEquals("replication", ((ACTION) decisionOrForkOrJoin.get(4)).getName());
+ Assert.assertEquals("post-replication-decision", ((DECISION) decisionOrForkOrJoin.get(5)).getName());
+ Assert.assertEquals("table-import", ((ACTION) decisionOrForkOrJoin.get(6)).getName());
+ Assert.assertEquals("cleanup-table-staging-dir", ((ACTION) decisionOrForkOrJoin.get(7)).getName());
+ Assert.assertEquals("succeeded-post-processing", ((ACTION) decisionOrForkOrJoin.get(8)).getName());
+ Assert.assertEquals("failed-post-processing", ((ACTION) decisionOrForkOrJoin.get(9)).getName());
+ }
+
+ @DataProvider(name = "secureOptions")
+ private Object[][] createOptions() {
+ return new Object[][] {
+ {"simple"},
+ {"kerberos"},
+ };
+ }
+
+ @Test (dataProvider = "secureOptions")
+ public void testReplicationCoordsForTableStorage(String secureOption) throws Exception {
+ StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);
+
+ OozieCoordinatorBuilder builder = OozieCoordinatorBuilder.get(tableFeed, Tag.REPLICATION);
+ List<Properties> coords = builder.buildCoords(trgCluster, new Path("/projects/falcon/"));
+ COORDINATORAPP coord = getCoordinator(trgMiniDFS, coords.get(0).getProperty(OozieEntityBuilder.ENTITY_PATH));
+
+ Assert.assertEquals("2010-01-01T00:40Z", coord.getStart());
+ Assert.assertEquals(getWorkflowAppPath(),
+ coord.getAction().getWorkflow().getAppPath());
+ Assert.assertEquals("FALCON_FEED_REPLICATION_" + tableFeed.getName() + "_"
+ + srcCluster.getName(), coord.getName());
+ Assert.assertEquals("${coord:minutes(20)}", coord.getFrequency());
+
+ SYNCDATASET inputDataset = (SYNCDATASET) coord.getDatasets()
+ .getDatasetOrAsyncDataset().get(0);
+ Assert.assertEquals("${coord:minutes(20)}", inputDataset.getFrequency());
+ Assert.assertEquals("input-dataset", inputDataset.getName());
+
+ String sourceRegistry = ClusterHelper.getInterface(srcCluster, Interfacetype.REGISTRY).getEndpoint();
+ sourceRegistry = sourceRegistry.replace("thrift", "hcat");
+ Assert.assertEquals(inputDataset.getUriTemplate(),
+ sourceRegistry + "/source_db/source_clicks_table/ds=${YEAR}${MONTH}${DAY};region=${region}");
+
+ SYNCDATASET outputDataset = (SYNCDATASET) coord.getDatasets()
+ .getDatasetOrAsyncDataset().get(1);
+ Assert.assertEquals(outputDataset.getFrequency(), "${coord:minutes(20)}");
+ Assert.assertEquals("output-dataset", outputDataset.getName());
+
+ String targetRegistry = ClusterHelper.getInterface(trgCluster, Interfacetype.REGISTRY).getEndpoint();
+ targetRegistry = targetRegistry.replace("thrift", "hcat");
+ Assert.assertEquals(outputDataset.getUriTemplate(),
+ targetRegistry + "/target_db/target_clicks_table/ds=${YEAR}${MONTH}${DAY};region=${region}");
+
+ String inEventName =coord.getInputEvents().getDataIn().get(0).getName();
+ String inEventDataset =coord.getInputEvents().getDataIn().get(0).getDataset();
+ String inEventInstance = coord.getInputEvents().getDataIn().get(0).getInstance().get(0);
+ Assert.assertEquals("input", inEventName);
+ Assert.assertEquals("input-dataset", inEventDataset);
+ Assert.assertEquals("${now(0,-40)}", inEventInstance);
+
+ String outEventInstance = coord.getOutputEvents().getDataOut().get(0).getInstance();
+ Assert.assertEquals("${now(0,-40)}", outEventInstance);
+
+ // assert FS staging area
+ String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
+ final FileSystem fs = trgMiniDFS.getFileSystem();
+ Assert.assertTrue(fs.exists(new Path(wfPath + "/scripts")));
+ Assert.assertTrue(fs.exists(new Path(wfPath + "/scripts/falcon-table-export.hql")));
+ Assert.assertTrue(fs.exists(new Path(wfPath + "/scripts/falcon-table-import.hql")));
+
+ Assert.assertTrue(fs.exists(new Path(wfPath + "/conf")));
+ Assert.assertTrue(fs.exists(new Path(wfPath + "/conf/falcon-source-hive-site.xml")));
+ Assert.assertTrue(fs.exists(new Path(wfPath + "/conf/falcon-target-hive-site.xml")));
+
+ HashMap<String, String> props = new HashMap<String, String>();
+ for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
+ props.put(prop.getName(), prop.getValue());
+ }
+
+ final CatalogStorage srcStorage = (CatalogStorage) FeedHelper.createStorage(srcCluster, tableFeed);
+ final CatalogStorage trgStorage = (CatalogStorage) FeedHelper.createStorage(trgCluster, tableFeed);
+
+ // verify the replication param that feed replicator depends on
+ Assert.assertEquals(props.get("sourceRelativePaths"), "IGNORE");
+
+ Assert.assertTrue(props.containsKey("distcpSourcePaths"));
+ Assert.assertEquals(props.get("distcpSourcePaths"),
+ FeedHelper.getStagingPath(srcCluster, tableFeed, srcStorage, Tag.REPLICATION,
+ "${coord:formatTime(coord:nominalTime(), 'yyyy-MM-dd-HH-mm')}" + "/" + trgCluster.getName()));
+
+ Assert.assertTrue(props.containsKey("distcpTargetPaths"));
+ Assert.assertEquals(props.get("distcpTargetPaths"),
+ FeedHelper.getStagingPath(trgCluster, tableFeed, trgStorage, Tag.REPLICATION,
+ "${coord:formatTime(coord:nominalTime(), 'yyyy-MM-dd-HH-mm')}" + "/" + trgCluster.getName()));
+
+ Assert.assertEquals(props.get("falconFeedStorageType"), Storage.TYPE.TABLE.name());
+
+ // verify table props
+ assertTableStorageProperties(srcCluster, srcStorage, props, "falconSource");
+ assertTableStorageProperties(trgCluster, trgStorage, props, "falconTarget");
+
+ // verify the late data params
+ Assert.assertEquals(props.get("falconInputFeeds"), tableFeed.getName());
+ Assert.assertEquals(props.get("falconInPaths"), "${coord:dataIn('input')}");
+ Assert.assertEquals(props.get("falconInputFeedStorageTypes"), Storage.TYPE.TABLE.name());
+ Assert.assertEquals(props.get("logDir"), getLogPath(trgCluster, tableFeed));
+
+ // verify the post processing params
+ Assert.assertEquals(props.get("feedNames"), tableFeed.getName());
+ Assert.assertEquals(props.get("feedInstancePaths"), "${coord:dataOut('output')}");
+
+ Assert.assertTrue(Storage.TYPE.TABLE == FeedHelper.getStorageType(tableFeed, trgCluster));
+ assertReplicationHCatCredentials(getWorkflowapp(coord),
+ coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", ""));
+ }
+
+ private void assertReplicationHCatCredentials(WORKFLOWAPP wf, String wfPath) throws IOException {
+ FileSystem fs = trgMiniDFS.getFileSystem();
+
+ Path hiveConfPath = new Path(wfPath, "conf/falcon-source-hive-site.xml");
+ Assert.assertTrue(fs.exists(hiveConfPath));
+
+ hiveConfPath = new Path(wfPath, "conf/falcon-target-hive-site.xml");
+ Assert.assertTrue(fs.exists(hiveConfPath));
+
+ boolean isSecurityEnabled = SecurityUtil.isSecurityEnabled();
+ if (isSecurityEnabled) {
+ Assert.assertNotNull(wf.getCredentials());
+ Assert.assertEquals(2, wf.getCredentials().getCredential().size());
+ }
+
+ List<Object> actions = wf.getDecisionOrForkOrJoin();
+ for (Object obj : actions) {
+ if (!(obj instanceof ACTION)) {
+ continue;
+ }
+ ACTION action = (ACTION) obj;
+ String actionName = action.getName();
+
+ if (!isSecurityEnabled) {
+ Assert.assertNull(action.getCred());
+ }
+
+ if ("recordsize".equals(actionName)) {
+ Assert.assertEquals(action.getJava().getJobXml(), "${wf:appPath()}/conf/falcon-source-hive-site.xml");
+ if (isSecurityEnabled) {
+ Assert.assertNotNull(action.getCred());
+ Assert.assertEquals(action.getCred(), "falconSourceHiveAuth");
+ }
+ } else if ("table-export".equals(actionName) && isSecurityEnabled) {
+ Assert.assertNotNull(action.getCred());
+ Assert.assertEquals(action.getCred(), "falconSourceHiveAuth");
+ } else if ("table-import".equals(actionName) && isSecurityEnabled) {
+ Assert.assertNotNull(action.getCred());
+ Assert.assertEquals(action.getCred(), "falconTargetHiveAuth");
+ }
+ }
+ }
+
+ private void assertTableStorageProperties(Cluster cluster, CatalogStorage tableStorage,
+ Map<String, String> props, String prefix) {
+ Assert.assertEquals(props.get(prefix + "NameNode"), ClusterHelper.getStorageUrl(cluster));
+ Assert.assertEquals(props.get(prefix + "JobTracker"), ClusterHelper.getMREndPoint(cluster));
+ Assert.assertEquals(props.get(prefix + "HcatNode"), tableStorage.getCatalogUrl());
+
+ Assert.assertEquals(props.get(prefix + "Database"), tableStorage.getDatabase());
+ Assert.assertEquals(props.get(prefix + "Table"), tableStorage.getTable());
+ Assert.assertEquals(props.get(prefix + "Partition"), "${coord:dataInPartitions('input', 'hive-export')}");
+ }
+
+ @Test
+ public void testRetentionCoords() throws Exception {
+ org.apache.falcon.entity.v0.feed.Cluster cluster = FeedHelper.getCluster(feed, srcCluster.getName());
+ final Calendar instance = Calendar.getInstance();
+ instance.roll(Calendar.YEAR, 1);
+ cluster.getValidity().setEnd(instance.getTime());
+
+ OozieCoordinatorBuilder builder = OozieCoordinatorBuilder.get(feed, Tag.RETENTION);
+ List<Properties> coords = builder.buildCoords(srcCluster, new Path("/projects/falcon/"));
+ COORDINATORAPP coord = getCoordinator(srcMiniDFS, coords.get(0).getProperty(OozieEntityBuilder.ENTITY_PATH));
+
+ Assert.assertEquals(coord.getAction().getWorkflow().getAppPath(), "${nameNode}/projects/falcon/RETENTION");
+ Assert.assertEquals(coord.getName(), "FALCON_FEED_RETENTION_" + feed.getName());
+ Assert.assertEquals(coord.getFrequency(), "${coord:hours(6)}");
+
+ HashMap<String, String> props = new HashMap<String, String>();
+ for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
+ props.put(prop.getName(), prop.getValue());
+ }
+
+ String feedDataPath = props.get("feedDataPath");
+ String storageType = props.get("falconFeedStorageType");
+
+ // verify the param that feed evictor depends on
+ Assert.assertEquals(storageType, Storage.TYPE.FILESYSTEM.name());
+
+ final Storage storage = FeedHelper.createStorage(cluster, feed);
+ if (feedDataPath != null) {
+ Assert.assertEquals(feedDataPath, storage.getUriTemplate()
+ .replaceAll(Storage.DOLLAR_EXPR_START_REGEX, Storage.QUESTION_EXPR_START_REGEX));
+ }
+
+ if (storageType != null) {
+ Assert.assertEquals(storageType, storage.getType().name());
+ }
+
+ // verify the post processing params
+ Assert.assertEquals(props.get("feedNames"), feed.getName());
+ Assert.assertEquals(props.get("feedInstancePaths"), "IGNORE");
+ Assert.assertEquals(props.get("logDir"), getLogPath(srcCluster, feed));
+
+ assertWorkflowRetries(coord);
+ }
+
+ @Test (dataProvider = "secureOptions")
+ public void testRetentionCoordsForTable(String secureOption) throws Exception {
+ StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);
+
+ org.apache.falcon.entity.v0.feed.Cluster cluster = FeedHelper.getCluster(tableFeed, trgCluster.getName());
+ final Calendar instance = Calendar.getInstance();
+ instance.roll(Calendar.YEAR, 1);
+ cluster.getValidity().setEnd(instance.getTime());
+
+ OozieCoordinatorBuilder builder = OozieCoordinatorBuilder.get(tableFeed, Tag.RETENTION);
+ List<Properties> coords = builder.buildCoords(trgCluster, new Path("/projects/falcon/"));
+ COORDINATORAPP coord = getCoordinator(trgMiniDFS, coords.get(0).getProperty(OozieEntityBuilder.ENTITY_PATH));
+
+ Assert.assertEquals(coord.getAction().getWorkflow().getAppPath(), "${nameNode}/projects/falcon/RETENTION");
+ Assert.assertEquals(coord.getName(), "FALCON_FEED_RETENTION_" + tableFeed.getName());
+ Assert.assertEquals(coord.getFrequency(), "${coord:hours(6)}");
+
+ HashMap<String, String> props = new HashMap<String, String>();
+ for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
+ props.put(prop.getName(), prop.getValue());
+ }
+
+ String feedDataPath = props.get("feedDataPath");
+ String storageType = props.get("falconFeedStorageType");
+
+ // verify the param that feed evictor depends on
+ Assert.assertEquals(storageType, Storage.TYPE.TABLE.name());
+
+ final Storage storage = FeedHelper.createStorage(cluster, tableFeed);
+ if (feedDataPath != null) {
+ Assert.assertEquals(feedDataPath, storage.getUriTemplate()
+ .replaceAll(Storage.DOLLAR_EXPR_START_REGEX, Storage.QUESTION_EXPR_START_REGEX));
+ }
+
+ if (storageType != null) {
+ Assert.assertEquals(storageType, storage.getType().name());
+ }
+
+ // verify the post processing params
+ Assert.assertEquals(props.get("feedNames"), tableFeed.getName());
+ Assert.assertEquals(props.get("feedInstancePaths"), "IGNORE");
+ Assert.assertEquals(props.get("logDir"), getLogPath(trgCluster, tableFeed));
+
+ assertWorkflowRetries(coord);
+
+ Assert.assertTrue(Storage.TYPE.TABLE == FeedHelper.getStorageType(tableFeed, trgCluster));
+ assertHCatCredentials(getWorkflowapp(coord),
+ coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", ""));
+ }
+
+ private void assertHCatCredentials(WORKFLOWAPP wf, String wfPath) throws IOException {
+ Path hiveConfPath = new Path(wfPath, "conf/hive-site.xml");
+ FileSystem fs = trgMiniDFS.getFileSystem();
+ Assert.assertTrue(fs.exists(hiveConfPath));
+
+ if (SecurityUtil.isSecurityEnabled()) {
+ Assert.assertNotNull(wf.getCredentials());
+ Assert.assertEquals(1, wf.getCredentials().getCredential().size());
+ }
+
+ List<Object> actions = wf.getDecisionOrForkOrJoin();
+ for (Object obj : actions) {
+ if (!(obj instanceof ACTION)) {
+ continue;
+ }
+ ACTION action = (ACTION) obj;
+ String actionName = action.getName();
+
+ if ("eviction".equals(actionName)) {
+ Assert.assertEquals(action.getJava().getJobXml(), "${wf:appPath()}/conf/hive-site.xml");
+ if (SecurityUtil.isSecurityEnabled()) {
+ Assert.assertNotNull(action.getCred());
+ Assert.assertEquals(action.getCred(), "falconHiveAuth");
+ }
+ }
+ }
+ }
+
+ private String getLogPath(Cluster aCluster, Feed aFeed) {
+ Path logPath = EntityUtil.getLogPath(aCluster, aFeed);
+ return (logPath.toUri().getScheme() == null ? "${nameNode}" : "") + logPath;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/java/org/apache/falcon/oozie/process/AbstractTestBase.java
----------------------------------------------------------------------
diff --git a/oozie/src/test/java/org/apache/falcon/oozie/process/AbstractTestBase.java b/oozie/src/test/java/org/apache/falcon/oozie/process/AbstractTestBase.java
new file mode 100644
index 0000000..54a2ea7
--- /dev/null
+++ b/oozie/src/test/java/org/apache/falcon/oozie/process/AbstractTestBase.java
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie.process;
+
+import org.apache.falcon.FalconException;
+import org.apache.falcon.cluster.util.EmbeddedCluster;
+import org.apache.falcon.entity.ClusterHelper;
+import org.apache.falcon.entity.store.ConfigurationStore;
+import org.apache.falcon.entity.v0.Entity;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.cluster.Interfacetype;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.entity.v0.process.Process;
+import org.apache.falcon.oozie.bundle.BUNDLEAPP;
+import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBElement;
+import javax.xml.bind.Unmarshaller;
+import javax.xml.transform.stream.StreamSource;
+import javax.xml.validation.Schema;
+import javax.xml.validation.SchemaFactory;
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.InputStreamReader;
+import java.util.Collection;
+
+/**
+ * Base for falcon unit tests involving configuration store.
+ */
+public class AbstractTestBase {
+ protected Entity storeEntity(EntityType type, String name, String resource, String writeEndpoint) throws Exception {
+ Unmarshaller unmarshaller = type.getUnmarshaller();
+ ConfigurationStore store = ConfigurationStore.get();
+ switch (type) {
+ case CLUSTER:
+ Cluster cluster = (Cluster) unmarshaller.unmarshal(this.getClass().getResource(resource));
+ if (name != null){
+ store.remove(type, name);
+ cluster.setName(name);
+ }
+ store.publish(type, cluster);
+
+ if (writeEndpoint != null) {
+ ClusterHelper.getInterface(cluster, Interfacetype.WRITE).setEndpoint(writeEndpoint);
+ FileSystem fs = new Path(writeEndpoint).getFileSystem(EmbeddedCluster.newConfiguration());
+ fs.create(
+ new Path(ClusterHelper.getLocation(cluster, "working"), "libext/FEED/retention/ext.jar")).close();
+ fs.create(
+ new Path(ClusterHelper.getLocation(cluster, "working"), "libext/FEED/replication/ext.jar")).close();
+ }
+
+ return cluster;
+
+ case FEED:
+ Feed feed = (Feed) unmarshaller.unmarshal(this.getClass().getResource(resource));
+ if (name != null) {
+ store.remove(type, name);
+ feed.setName(name);
+ }
+ store.publish(type, feed);
+ return feed;
+
+ case PROCESS:
+ Process process = (Process) unmarshaller.unmarshal(this.getClass().getResource(resource));
+ if (name != null) {
+ store.remove(type, name);
+ process.setName(name);
+ }
+ store.publish(type, process);
+ return process;
+
+ default:
+ }
+
+ throw new IllegalArgumentException("Unhandled type: " + type);
+ }
+
+ protected COORDINATORAPP getCoordinator(FileSystem fs, Path path) throws Exception {
+ String coordStr = readFile(fs, new Path(path, "coordinator.xml"));
+
+ Unmarshaller unmarshaller = JAXBContext.newInstance(COORDINATORAPP.class).createUnmarshaller();
+ SchemaFactory schemaFactory = SchemaFactory.newInstance("http://www.w3.org/2001/XMLSchema");
+ Schema schema = schemaFactory.newSchema(this.getClass().getResource("/oozie-coordinator-0.3.xsd"));
+ unmarshaller.setSchema(schema);
+ JAXBElement<COORDINATORAPP> jaxbBundle = unmarshaller.unmarshal(
+ new StreamSource(new ByteArrayInputStream(coordStr.trim().getBytes())), COORDINATORAPP.class);
+ return jaxbBundle.getValue();
+ }
+
+ protected BUNDLEAPP getBundle(FileSystem fs, Path path) throws Exception {
+ String bundleStr = readFile(fs, new Path(path, "bundle.xml"));
+
+ Unmarshaller unmarshaller = JAXBContext.newInstance(BUNDLEAPP.class).createUnmarshaller();
+ SchemaFactory schemaFactory = SchemaFactory.newInstance("http://www.w3.org/2001/XMLSchema");
+ Schema schema = schemaFactory.newSchema(this.getClass().getResource("/oozie-bundle-0.1.xsd"));
+ unmarshaller.setSchema(schema);
+ JAXBElement<BUNDLEAPP> jaxbBundle = unmarshaller.unmarshal(
+ new StreamSource(new ByteArrayInputStream(bundleStr.trim().getBytes())), BUNDLEAPP.class);
+ return jaxbBundle.getValue();
+ }
+
+ protected String readFile(FileSystem fs, Path path) throws Exception {
+ BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(path)));
+ String line;
+ StringBuilder contents = new StringBuilder();
+ while ((line = reader.readLine()) != null) {
+ contents.append(line);
+ }
+ return contents.toString();
+ }
+
+ protected void cleanupStore() throws FalconException {
+ ConfigurationStore store = ConfigurationStore.get();
+ for (EntityType type : EntityType.values()) {
+ Collection<String> entities = store.getEntities(type);
+ for (String entity : entities) {
+ store.remove(type, entity);
+ }
+ }
+ }
+}
[8/9] FALCON-369 Refactor workflow builder. Contributed by Shwetha GS
Posted by sh...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/src/main/resources/config/workflow/retention-workflow.xml
----------------------------------------------------------------------
diff --git a/feed/src/main/resources/config/workflow/retention-workflow.xml b/feed/src/main/resources/config/workflow/retention-workflow.xml
deleted file mode 100644
index 5138865..0000000
--- a/feed/src/main/resources/config/workflow/retention-workflow.xml
+++ /dev/null
@@ -1,208 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<workflow-app xmlns='uri:oozie:workflow:0.3' name='falcon-feed-parent-workflow'>
- <start to='eviction'/>
- <action name="eviction">
- <java>
- <job-tracker>${jobTracker}</job-tracker>
- <name-node>${nameNode}</name-node>
- <configuration>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- <!-- HCatalog jars -->
- <property>
- <name>oozie.action.sharelib.for.java</name>
- <value>hcatalog</value>
- </property>
- </configuration>
- <main-class>org.apache.falcon.retention.FeedEvictor</main-class>
- <arg>-feedBasePath</arg>
- <arg>${feedDataPath}</arg>
- <arg>-falconFeedStorageType</arg>
- <arg>${falconFeedStorageType}</arg>
- <arg>-retentionType</arg>
- <arg>instance</arg>
- <arg>-retentionLimit</arg>
- <arg>${limit}</arg>
- <arg>-timeZone</arg>
- <arg>${timeZone}</arg>
- <arg>-frequency</arg>
- <arg>${frequency}</arg>
- <arg>-logFile</arg>
- <arg>${logDir}/instancePaths-${nominalTime}.csv</arg>
- </java>
- <ok to="succeeded-post-processing"/>
- <error to="failed-post-processing"/>
- </action>
- <action name='succeeded-post-processing'>
- <java>
- <job-tracker>${jobTracker}</job-tracker>
- <name-node>${nameNode}</name-node>
- <configuration>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- </configuration>
- <main-class>org.apache.falcon.workflow.FalconPostProcessing</main-class>
- <arg>-cluster</arg>
- <arg>${cluster}</arg>
- <arg>-entityType</arg>
- <arg>${entityType}</arg>
- <arg>-entityName</arg>
- <arg>${entityName}</arg>
- <arg>-nominalTime</arg>
- <arg>${nominalTime}</arg>
- <arg>-operation</arg>
- <arg>DELETE</arg>
- <arg>-workflowId</arg>
- <arg>${wf:id()}</arg>
- <arg>-runId</arg>
- <arg>${wf:run()}</arg>
- <arg>-status</arg>
- <arg>SUCCEEDED</arg>
- <arg>-timeStamp</arg>
- <arg>${timeStamp}</arg>
- <arg>-brokerImplClass</arg>
- <arg>${wf:conf("broker.impl.class")}</arg>
- <arg>-brokerUrl</arg>
- <arg>${wf:conf("broker.url")}</arg>
- <arg>-userBrokerImplClass</arg>
- <arg>${userBrokerImplClass}</arg>
- <arg>-userBrokerUrl</arg>
- <arg>${userBrokerUrl}</arg>
- <arg>-brokerTTL</arg>
- <arg>${wf:conf("broker.ttlInMins")}</arg>
- <arg>-feedNames</arg>
- <arg>${feedNames}</arg>
- <arg>-feedInstancePaths</arg>
- <arg>${feedInstancePaths}</arg>
- <arg>-logFile</arg>
- <arg>${logDir}/instancePaths-${nominalTime}.csv</arg>
- <arg>-workflowEngineUrl</arg>
- <arg>${workflowEngineUrl}</arg>
- <arg>-subflowId</arg>
- <arg>${wf:id()}</arg>
- <arg>-logDir</arg>
- <arg>${logDir}/job-${nominalTime}/</arg>
- <arg>-workflowUser</arg>
- <arg>${wf:user()}</arg>
- <arg>-userWorkflowName</arg>
- <arg>${userWorkflowName}</arg>
- <arg>-userWorkflowVersion</arg>
- <arg>${userWorkflowVersion}</arg>
- <arg>-userWorkflowEngine</arg>
- <arg>${userWorkflowEngine}</arg>
- <arg>-falconInputFeeds</arg>
- <arg>${falconInputFeeds}</arg>
- <arg>-falconInputPaths</arg>
- <arg>${falconInPaths}</arg>
- <file>${wf:conf("falcon.libpath")}/activemq-core.jar</file>
- <file>${wf:conf("falcon.libpath")}/geronimo-j2ee-management.jar</file>
- <file>${wf:conf("falcon.libpath")}/jms.jar</file>
- <file>${wf:conf("falcon.libpath")}/json-simple.jar</file>
- <file>${wf:conf("falcon.libpath")}/oozie-client.jar</file>
- <file>${wf:conf("falcon.libpath")}/spring-jms.jar</file>
- </java>
- <ok to="end"/>
- <error to="fail"/>
- </action>
- <action name='failed-post-processing'>
- <java>
- <job-tracker>${jobTracker}</job-tracker>
- <name-node>${nameNode}</name-node>
- <configuration>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- </configuration>
- <main-class>org.apache.falcon.workflow.FalconPostProcessing</main-class>
- <arg>-cluster</arg>
- <arg>${cluster}</arg>
- <arg>-entityType</arg>
- <arg>${entityType}</arg>
- <arg>-entityName</arg>
- <arg>${entityName}</arg>
- <arg>-nominalTime</arg>
- <arg>${nominalTime}</arg>
- <arg>-operation</arg>
- <arg>DELETE</arg>
- <arg>-workflowId</arg>
- <arg>${wf:id()}</arg>
- <arg>-runId</arg>
- <arg>${wf:run()}</arg>
- <arg>-status</arg>
- <arg>FAILED</arg>
- <arg>-timeStamp</arg>
- <arg>${timeStamp}</arg>
- <arg>-brokerImplClass</arg>
- <arg>${wf:conf("broker.impl.class")}</arg>
- <arg>-brokerUrl</arg>
- <arg>${wf:conf("broker.url")}</arg>
- <arg>-userBrokerImplClass</arg>
- <arg>${userBrokerImplClass}</arg>
- <arg>-userBrokerUrl</arg>
- <arg>${userBrokerUrl}</arg>
- <arg>-brokerTTL</arg>
- <arg>${wf:conf("broker.ttlInMins")}</arg>
- <arg>-feedNames</arg>
- <arg>${feedNames}</arg>
- <arg>-feedInstancePaths</arg>
- <arg>${feedInstancePaths}</arg>
- <arg>-logFile</arg>
- <arg>${logDir}/instancePaths-${nominalTime}.csv</arg>
- <arg>-workflowEngineUrl</arg>
- <arg>${workflowEngineUrl}</arg>
- <arg>-subflowId</arg>
- <arg>${wf:id()}</arg>
- <arg>-logDir</arg>
- <arg>${logDir}/job-${nominalTime}/</arg>
- <arg>-workflowUser</arg>
- <arg>${wf:user()}</arg>
- <file>${wf:conf("falcon.libpath")}/activemq-core.jar</file>
- <file>${wf:conf("falcon.libpath")}/geronimo-j2ee-management.jar</file>
- <file>${wf:conf("falcon.libpath")}/jms.jar</file>
- <file>${wf:conf("falcon.libpath")}/json-simple.jar</file>
- <file>${wf:conf("falcon.libpath")}/oozie-client.jar</file>
- <file>${wf:conf("falcon.libpath")}/spring-jms.jar</file>
- </java>
- <ok to="fail"/>
- <error to="fail"/>
- </action>
- <kill name="fail">
- <message>
- Workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
- </message>
- </kill>
- <end name='end'/>
-</workflow-app>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/src/test/java/org/apache/falcon/converter/OozieFeedWorkflowBuilderTest.java
----------------------------------------------------------------------
diff --git a/feed/src/test/java/org/apache/falcon/converter/OozieFeedWorkflowBuilderTest.java b/feed/src/test/java/org/apache/falcon/converter/OozieFeedWorkflowBuilderTest.java
deleted file mode 100644
index d793e65..0000000
--- a/feed/src/test/java/org/apache/falcon/converter/OozieFeedWorkflowBuilderTest.java
+++ /dev/null
@@ -1,669 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.falcon.converter;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.falcon.FalconException;
-import org.apache.falcon.Tag;
-import org.apache.falcon.cluster.util.EmbeddedCluster;
-import org.apache.falcon.entity.CatalogStorage;
-import org.apache.falcon.entity.ClusterHelper;
-import org.apache.falcon.entity.EntityUtil;
-import org.apache.falcon.entity.FeedHelper;
-import org.apache.falcon.entity.Storage;
-import org.apache.falcon.entity.store.ConfigurationStore;
-import org.apache.falcon.entity.v0.Entity;
-import org.apache.falcon.entity.v0.EntityType;
-import org.apache.falcon.entity.v0.SchemaHelper;
-import org.apache.falcon.entity.v0.cluster.Cluster;
-import org.apache.falcon.entity.v0.cluster.Interfacetype;
-import org.apache.falcon.entity.v0.feed.Feed;
-import org.apache.falcon.oozie.coordinator.CONFIGURATION.Property;
-import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
-import org.apache.falcon.oozie.coordinator.SYNCDATASET;
-import org.apache.falcon.oozie.workflow.ACTION;
-import org.apache.falcon.oozie.workflow.DECISION;
-import org.apache.falcon.oozie.workflow.JAVA;
-import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
-import org.apache.falcon.security.CurrentUser;
-import org.apache.falcon.security.SecurityUtil;
-import org.apache.falcon.util.StartupProperties;
-import org.apache.falcon.workflow.OozieFeedWorkflowBuilder;
-import org.apache.falcon.workflow.OozieWorkflowBuilder;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.testng.Assert;
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.DataProvider;
-import org.testng.annotations.Test;
-
-import javax.xml.bind.JAXBContext;
-import javax.xml.bind.JAXBElement;
-import javax.xml.bind.JAXBException;
-import javax.xml.bind.Unmarshaller;
-import java.io.IOException;
-import java.util.Calendar;
-import java.util.Collection;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Tests for Oozie workflow definition for feed replication & retention.
- */
-public class OozieFeedWorkflowBuilderTest {
- private EmbeddedCluster srcMiniDFS;
- private EmbeddedCluster trgMiniDFS;
- private final ConfigurationStore store = ConfigurationStore.get();
- private Cluster srcCluster;
- private Cluster trgCluster;
- private Cluster alphaTrgCluster;
- private Cluster betaTrgCluster;
- private Feed feed;
- private Feed tableFeed;
- private Feed fsReplFeed;
-
- private static final String SRC_CLUSTER_PATH = "/src-cluster.xml";
- private static final String TRG_CLUSTER_PATH = "/trg-cluster.xml";
- private static final String FEED = "/feed.xml";
- private static final String TABLE_FEED = "/table-replication-feed.xml";
- private static final String FS_REPLICATION_FEED = "/fs-replication-feed.xml";
-
- @BeforeClass
- public void setUpDFS() throws Exception {
- CurrentUser.authenticate("falcon");
-
- srcMiniDFS = EmbeddedCluster.newCluster("cluster1");
- String srcHdfsUrl = srcMiniDFS.getConf().get("fs.default.name");
-
- trgMiniDFS = EmbeddedCluster.newCluster("cluster2");
- String trgHdfsUrl = trgMiniDFS.getConf().get("fs.default.name");
-
- cleanupStore();
-
- org.apache.falcon.entity.v0.cluster.Property property =
- new org.apache.falcon.entity.v0.cluster.Property();
- property.setName(OozieWorkflowBuilder.METASTORE_KERBEROS_PRINCIPAL);
- property.setValue("hive/_HOST");
-
- srcCluster = (Cluster) storeEntity(EntityType.CLUSTER, SRC_CLUSTER_PATH, srcHdfsUrl);
- srcCluster.getProperties().getProperties().add(property);
-
- trgCluster = (Cluster) storeEntity(EntityType.CLUSTER, TRG_CLUSTER_PATH, trgHdfsUrl);
- trgCluster.getProperties().getProperties().add(property);
-
- alphaTrgCluster = (Cluster) storeEntity(EntityType.CLUSTER, "/trg-cluster-alpha.xml", trgHdfsUrl);
- betaTrgCluster = (Cluster) storeEntity(EntityType.CLUSTER, "/trg-cluster-beta.xml", trgHdfsUrl);
-
- feed = (Feed) storeEntity(EntityType.FEED, FEED, null);
- fsReplFeed = (Feed) storeEntity(EntityType.FEED, FS_REPLICATION_FEED, null);
- tableFeed = (Feed) storeEntity(EntityType.FEED, TABLE_FEED, null);
- }
-
- protected Entity storeEntity(EntityType type, String template, String writeEndpoint) throws Exception {
- Unmarshaller unmarshaller = type.getUnmarshaller();
- Entity entity = (Entity) unmarshaller
- .unmarshal(OozieFeedWorkflowBuilderTest.class.getResource(template));
- store.publish(type, entity);
-
- if (type == EntityType.CLUSTER) {
- Cluster cluster = (Cluster) entity;
- ClusterHelper.getInterface(cluster, Interfacetype.WRITE).setEndpoint(writeEndpoint);
- FileSystem fs = new Path(writeEndpoint).getFileSystem(EmbeddedCluster.newConfiguration());
- fs.create(new Path(ClusterHelper.getLocation(cluster, "working"), "libext/FEED/retention/ext.jar")).close();
- fs.create(
- new Path(ClusterHelper.getLocation(cluster, "working"), "libext/FEED/replication/ext.jar")).close();
- }
- return entity;
- }
-
- protected void cleanupStore() throws FalconException {
- for (EntityType type : EntityType.values()) {
- Collection<String> entities = store.getEntities(type);
- for (String entity : entities) {
- store.remove(type, entity);
- }
- }
- }
-
- @AfterClass
- public void stopDFS() {
- srcMiniDFS.shutdown();
- trgMiniDFS.shutdown();
- }
-
- @Test
- public void testReplicationCoordsForFSStorage() throws Exception {
- OozieFeedWorkflowBuilder builder = new OozieFeedWorkflowBuilder(feed);
- List<COORDINATORAPP> coords = builder.getCoordinators(trgCluster, new Path("/projects/falcon/"));
- //Assert retention coord
- COORDINATORAPP coord = coords.get(0);
- assertLibExtensions(coord, "retention");
-
- //Assert replication coord
- coord = coords.get(1);
- Assert.assertEquals("2010-01-01T00:40Z", coord.getStart());
- Assert.assertEquals(getWorkflowAppPath(), coord.getAction().getWorkflow().getAppPath());
- Assert.assertEquals("FALCON_FEED_REPLICATION_" + feed.getName() + "_"
- + srcCluster.getName(), coord.getName());
- Assert.assertEquals("${coord:minutes(20)}", coord.getFrequency());
- SYNCDATASET inputDataset = (SYNCDATASET) coord.getDatasets()
- .getDatasetOrAsyncDataset().get(0);
- SYNCDATASET outputDataset = (SYNCDATASET) coord.getDatasets()
- .getDatasetOrAsyncDataset().get(1);
-
- Assert.assertEquals("${coord:minutes(20)}", inputDataset.getFrequency());
- Assert.assertEquals("input-dataset", inputDataset.getName());
- Assert.assertEquals(
- ClusterHelper.getReadOnlyStorageUrl(srcCluster)
- + "/examples/input-data/rawLogs/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}",
- inputDataset.getUriTemplate());
-
- Assert.assertEquals("${coord:minutes(20)}",
- outputDataset.getFrequency());
- Assert.assertEquals("output-dataset", outputDataset.getName());
- Assert.assertEquals(ClusterHelper.getStorageUrl(trgCluster)
- + "/examples/input-data/rawLogs/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}",
- outputDataset.getUriTemplate());
- String inEventName =coord.getInputEvents().getDataIn().get(0).getName();
- String inEventDataset =coord.getInputEvents().getDataIn().get(0).getDataset();
- String inEventInstance = coord.getInputEvents().getDataIn().get(0).getInstance().get(0);
- Assert.assertEquals("input", inEventName);
- Assert.assertEquals("input-dataset", inEventDataset);
- Assert.assertEquals("${now(0,-40)}", inEventInstance);
-
- String outEventInstance = coord.getOutputEvents().getDataOut().get(0).getInstance();
- Assert.assertEquals("${now(0,-40)}", outEventInstance);
-
- HashMap<String, String> props = new HashMap<String, String>();
- for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
- props.put(prop.getName(), prop.getValue());
- }
-
- // verify the replication param that feed replicator depends on
- String pathsWithPartitions = getPathsWithPartitions(srcCluster, trgCluster, feed);
- Assert.assertEquals(props.get("sourceRelativePaths"), pathsWithPartitions);
-
- Assert.assertEquals(props.get("sourceRelativePaths"), "${coord:dataIn('input')}");
- Assert.assertEquals(props.get("distcpSourcePaths"), "${coord:dataIn('input')}");
- Assert.assertEquals(props.get("distcpTargetPaths"), "${coord:dataOut('output')}");
- Assert.assertEquals(props.get("falconFeedStorageType"), Storage.TYPE.FILESYSTEM.name());
-
- // verify the late data params
- Assert.assertEquals(props.get("falconInputFeeds"), feed.getName());
- Assert.assertEquals(props.get("falconInPaths"), "${coord:dataIn('input')}");
- Assert.assertEquals(props.get("falconInPaths"), pathsWithPartitions);
- Assert.assertEquals(props.get("falconInputFeedStorageTypes"), Storage.TYPE.FILESYSTEM.name());
- Assert.assertEquals(props.get("logDir"), getLogPath(trgCluster, feed));
-
- // verify the post processing params
- Assert.assertEquals(props.get("feedNames"), feed.getName());
- Assert.assertEquals(props.get("feedInstancePaths"), "${coord:dataOut('output')}");
-
- // verify workflow params
- Assert.assertEquals(props.get("userWorkflowName"), "replication-policy");
- Assert.assertEquals(props.get("userWorkflowVersion"), "0.5");
- Assert.assertEquals(props.get("userWorkflowEngine"), "falcon");
-
- // verify default params
- Assert.assertEquals(props.get("queueName"), "default");
- Assert.assertEquals(props.get("jobPriority"), "NORMAL");
- Assert.assertEquals(props.get("maxMaps"), "5");
- Assert.assertEquals(props.get("mapBandwidthKB"), "102400");
-
- assertLibExtensions(coord, "replication");
- WORKFLOWAPP wf = getWorkflowapp(coord);
- assertWorkflowRetries(wf);
-
- Assert.assertFalse(Storage.TYPE.TABLE == FeedHelper.getStorageType(feed, trgCluster));
- }
-
- private String getWorkflowAppPath() {
- return "${nameNode}/projects/falcon/REPLICATION/" + srcCluster.getName();
- }
-
- private void assertWorkflowRetries(COORDINATORAPP coord) throws JAXBException, IOException {
- assertWorkflowRetries(getWorkflowapp(coord));
- }
-
- private void assertWorkflowRetries(WORKFLOWAPP wf) throws JAXBException, IOException {
- List<Object> actions = wf.getDecisionOrForkOrJoin();
- for (Object obj : actions) {
- if (!(obj instanceof ACTION)) {
- continue;
- }
- ACTION action = (ACTION) obj;
- String actionName = action.getName();
- if (OozieWorkflowBuilder.FALCON_ACTIONS.contains(actionName)) {
- Assert.assertEquals(action.getRetryMax(), "3");
- Assert.assertEquals(action.getRetryInterval(), "1");
- }
- }
- }
-
- private void assertLibExtensions(COORDINATORAPP coord, String lifecycle) throws Exception {
- WORKFLOWAPP wf = getWorkflowapp(coord);
- List<Object> actions = wf.getDecisionOrForkOrJoin();
- for (Object obj : actions) {
- if (!(obj instanceof ACTION)) {
- continue;
- }
- ACTION action = (ACTION) obj;
- List<String> files = null;
- if (action.getJava() != null) {
- files = action.getJava().getFile();
- } else if (action.getPig() != null) {
- files = action.getPig().getFile();
- } else if (action.getMapReduce() != null) {
- files = action.getMapReduce().getFile();
- }
- if (files != null) {
- Assert.assertTrue(files.get(files.size() - 1).endsWith("/projects/falcon/working/libext/FEED/"
- + lifecycle + "/ext.jar"));
- }
- }
- }
-
- @SuppressWarnings("unchecked")
- private WORKFLOWAPP getWorkflowapp(COORDINATORAPP coord) throws JAXBException, IOException {
- String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
- JAXBContext jaxbContext = JAXBContext.newInstance(WORKFLOWAPP.class);
- return ((JAXBElement<WORKFLOWAPP>) jaxbContext.createUnmarshaller().unmarshal(
- trgMiniDFS.getFileSystem().open(new Path(wfPath, "workflow.xml")))).getValue();
- }
-
- @Test
- public void testReplicationCoordsForFSStorageWithMultipleTargets() throws Exception {
- OozieFeedWorkflowBuilder builder = new OozieFeedWorkflowBuilder(fsReplFeed);
-
- List<COORDINATORAPP> alphaCoords = builder.getCoordinators(alphaTrgCluster, new Path("/alpha/falcon/"));
- final COORDINATORAPP alphaCoord = alphaCoords.get(0);
- Assert.assertEquals(alphaCoord.getStart(), "2012-10-01T12:05Z");
- Assert.assertEquals(alphaCoord.getEnd(), "2012-10-01T12:11Z");
-
- String pathsWithPartitions = getPathsWithPartitions(srcCluster, alphaTrgCluster, fsReplFeed);
- assertReplCoord(alphaCoord, fsReplFeed, alphaTrgCluster.getName(), pathsWithPartitions);
-
- List<COORDINATORAPP> betaCoords = builder.getCoordinators(betaTrgCluster, new Path("/beta/falcon/"));
- final COORDINATORAPP betaCoord = betaCoords.get(0);
- Assert.assertEquals(betaCoord.getStart(), "2012-10-01T12:10Z");
- Assert.assertEquals(betaCoord.getEnd(), "2012-10-01T12:26Z");
-
- pathsWithPartitions = getPathsWithPartitions(srcCluster, betaTrgCluster, fsReplFeed);
- assertReplCoord(betaCoord, fsReplFeed, betaTrgCluster.getName(), pathsWithPartitions);
- }
-
- private String getPathsWithPartitions(Cluster sourceCluster, Cluster targetCluster,
- Feed aFeed) throws FalconException {
- String srcPart = FeedHelper.normalizePartitionExpression(
- FeedHelper.getCluster(aFeed, sourceCluster.getName()).getPartition());
- srcPart = FeedHelper.evaluateClusterExp(sourceCluster, srcPart);
- String targetPart = FeedHelper.normalizePartitionExpression(
- FeedHelper.getCluster(aFeed, targetCluster.getName()).getPartition());
- targetPart = FeedHelper.evaluateClusterExp(targetCluster, targetPart);
-
- String pathsWithPartitions = "${coord:dataIn('input')}/"
- + FeedHelper.normalizePartitionExpression(srcPart, targetPart);
- String parts = pathsWithPartitions.replaceAll("//+", "/");
- parts = StringUtils.stripEnd(parts, "/");
- return parts;
- }
-
- private void assertReplCoord(COORDINATORAPP coord, Feed aFeed, String clusterName,
- String pathsWithPartitions) throws JAXBException, IOException {
- org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(aFeed, clusterName);
- Date startDate = feedCluster.getValidity().getStart();
- Assert.assertEquals(coord.getStart(), SchemaHelper.formatDateUTC(startDate));
-
- Date endDate = feedCluster.getValidity().getEnd();
- Assert.assertEquals(coord.getEnd(), SchemaHelper.formatDateUTC(endDate));
-
- WORKFLOWAPP workflow = getWorkflowapp(coord);
- assertWorkflowDefinition(fsReplFeed, workflow);
-
- List<Object> actions = workflow.getDecisionOrForkOrJoin();
- System.out.println("actions = " + actions);
-
- ACTION replicationActionNode = (ACTION) actions.get(4);
- Assert.assertEquals(replicationActionNode.getName(), "replication");
-
- JAVA replication = replicationActionNode.getJava();
- List<String> args = replication.getArg();
- Assert.assertEquals(args.size(), 13);
-
- HashMap<String, String> props = new HashMap<String, String>();
- for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
- props.put(prop.getName(), prop.getValue());
- }
-
- Assert.assertEquals(props.get("sourceRelativePaths"), pathsWithPartitions);
- Assert.assertEquals(props.get("sourceRelativePaths"), "${coord:dataIn('input')}/" + srcCluster.getColo());
- Assert.assertEquals(props.get("distcpSourcePaths"), "${coord:dataIn('input')}");
- Assert.assertEquals(props.get("distcpTargetPaths"), "${coord:dataOut('output')}");
- Assert.assertEquals(props.get("falconFeedStorageType"), Storage.TYPE.FILESYSTEM.name());
- Assert.assertEquals(props.get("maxMaps"), "33");
- Assert.assertEquals(props.get("mapBandwidthKB"), "2048");
- Assert.assertEquals(props.get("logDir"), getLogPath(trgCluster, aFeed));
- }
-
- public void assertWorkflowDefinition(Feed aFeed, WORKFLOWAPP parentWorkflow) {
- Assert.assertEquals(EntityUtil.getWorkflowName(Tag.REPLICATION, aFeed).toString(), parentWorkflow.getName());
-
- List<Object> decisionOrForkOrJoin = parentWorkflow.getDecisionOrForkOrJoin();
- Assert.assertEquals("should-record", ((DECISION) decisionOrForkOrJoin.get(0)).getName());
- Assert.assertEquals("recordsize", ((ACTION) decisionOrForkOrJoin.get(1)).getName());
- Assert.assertEquals("replication-decision", ((DECISION) decisionOrForkOrJoin.get(2)).getName());
- Assert.assertEquals("table-export", ((ACTION) decisionOrForkOrJoin.get(3)).getName());
- Assert.assertEquals("replication", ((ACTION) decisionOrForkOrJoin.get(4)).getName());
- Assert.assertEquals("post-replication-decision", ((DECISION) decisionOrForkOrJoin.get(5)).getName());
- Assert.assertEquals("table-import", ((ACTION) decisionOrForkOrJoin.get(6)).getName());
- Assert.assertEquals("cleanup-table-staging-dir", ((ACTION) decisionOrForkOrJoin.get(7)).getName());
- Assert.assertEquals("succeeded-post-processing", ((ACTION) decisionOrForkOrJoin.get(8)).getName());
- Assert.assertEquals("failed-post-processing", ((ACTION) decisionOrForkOrJoin.get(9)).getName());
- }
-
- @DataProvider(name = "secureOptions")
- private Object[][] createOptions() {
- return new Object[][] {
- {"simple"},
- {"kerberos"},
- };
- }
-
- @Test (dataProvider = "secureOptions")
- public void testReplicationCoordsForTableStorage(String secureOption) throws Exception {
- StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);
-
- OozieFeedWorkflowBuilder builder = new OozieFeedWorkflowBuilder(tableFeed);
- List<COORDINATORAPP> coords = builder.getCoordinators(
- trgCluster, new Path("/projects/falcon/"));
- COORDINATORAPP coord = coords.get(0);
-
- Assert.assertEquals("2010-01-01T00:40Z", coord.getStart());
- Assert.assertEquals(getWorkflowAppPath(),
- coord.getAction().getWorkflow().getAppPath());
- Assert.assertEquals("FALCON_FEED_REPLICATION_" + tableFeed.getName() + "_"
- + srcCluster.getName(), coord.getName());
- Assert.assertEquals("${coord:minutes(20)}", coord.getFrequency());
-
- SYNCDATASET inputDataset = (SYNCDATASET) coord.getDatasets()
- .getDatasetOrAsyncDataset().get(0);
- Assert.assertEquals("${coord:minutes(20)}", inputDataset.getFrequency());
- Assert.assertEquals("input-dataset", inputDataset.getName());
-
- String sourceRegistry = ClusterHelper.getInterface(srcCluster, Interfacetype.REGISTRY).getEndpoint();
- sourceRegistry = sourceRegistry.replace("thrift", "hcat");
- Assert.assertEquals(inputDataset.getUriTemplate(),
- sourceRegistry + "/source_db/source_clicks_table/ds=${YEAR}${MONTH}${DAY};region=${region}");
-
- SYNCDATASET outputDataset = (SYNCDATASET) coord.getDatasets()
- .getDatasetOrAsyncDataset().get(1);
- Assert.assertEquals(outputDataset.getFrequency(), "${coord:minutes(20)}");
- Assert.assertEquals("output-dataset", outputDataset.getName());
-
- String targetRegistry = ClusterHelper.getInterface(trgCluster, Interfacetype.REGISTRY).getEndpoint();
- targetRegistry = targetRegistry.replace("thrift", "hcat");
- Assert.assertEquals(outputDataset.getUriTemplate(),
- targetRegistry + "/target_db/target_clicks_table/ds=${YEAR}${MONTH}${DAY};region=${region}");
-
- String inEventName =coord.getInputEvents().getDataIn().get(0).getName();
- String inEventDataset =coord.getInputEvents().getDataIn().get(0).getDataset();
- String inEventInstance = coord.getInputEvents().getDataIn().get(0).getInstance().get(0);
- Assert.assertEquals("input", inEventName);
- Assert.assertEquals("input-dataset", inEventDataset);
- Assert.assertEquals("${now(0,-40)}", inEventInstance);
-
- String outEventInstance = coord.getOutputEvents().getDataOut().get(0).getInstance();
- Assert.assertEquals("${now(0,-40)}", outEventInstance);
-
- // assert FS staging area
- String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
- final FileSystem fs = trgMiniDFS.getFileSystem();
- Assert.assertTrue(fs.exists(new Path(wfPath + "/scripts")));
- Assert.assertTrue(fs.exists(new Path(wfPath + "/scripts/falcon-table-export.hql")));
- Assert.assertTrue(fs.exists(new Path(wfPath + "/scripts/falcon-table-import.hql")));
-
- Assert.assertTrue(fs.exists(new Path(wfPath + "/conf")));
- Assert.assertTrue(fs.exists(new Path(wfPath + "/conf/falcon-source-hive-site.xml")));
- Assert.assertTrue(fs.exists(new Path(wfPath + "/conf/falcon-target-hive-site.xml")));
-
- HashMap<String, String> props = new HashMap<String, String>();
- for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
- props.put(prop.getName(), prop.getValue());
- }
-
- final CatalogStorage srcStorage = (CatalogStorage) FeedHelper.createStorage(srcCluster, tableFeed);
- final CatalogStorage trgStorage = (CatalogStorage) FeedHelper.createStorage(trgCluster, tableFeed);
-
- // verify the replication param that feed replicator depends on
- Assert.assertEquals(props.get("sourceRelativePaths"), "IGNORE");
-
- Assert.assertTrue(props.containsKey("distcpSourcePaths"));
- Assert.assertEquals(props.get("distcpSourcePaths"),
- FeedHelper.getStagingPath(srcCluster, tableFeed, srcStorage, Tag.REPLICATION,
- "${coord:formatTime(coord:nominalTime(), 'yyyy-MM-dd-HH-mm')}" + "/" + trgCluster.getName()));
-
- Assert.assertTrue(props.containsKey("distcpTargetPaths"));
- Assert.assertEquals(props.get("distcpTargetPaths"),
- FeedHelper.getStagingPath(trgCluster, tableFeed, trgStorage, Tag.REPLICATION,
- "${coord:formatTime(coord:nominalTime(), 'yyyy-MM-dd-HH-mm')}" + "/" + trgCluster.getName()));
-
- Assert.assertEquals(props.get("falconFeedStorageType"), Storage.TYPE.TABLE.name());
-
- // verify table props
- assertTableStorageProperties(srcCluster, srcStorage, props, "falconSource");
- assertTableStorageProperties(trgCluster, trgStorage, props, "falconTarget");
-
- // verify the late data params
- Assert.assertEquals(props.get("falconInputFeeds"), tableFeed.getName());
- Assert.assertEquals(props.get("falconInPaths"), "${coord:dataIn('input')}");
- Assert.assertEquals(props.get("falconInputFeedStorageTypes"), Storage.TYPE.TABLE.name());
- Assert.assertEquals(props.get("logDir"), getLogPath(trgCluster, tableFeed));
-
- // verify the post processing params
- Assert.assertEquals(props.get("feedNames"), tableFeed.getName());
- Assert.assertEquals(props.get("feedInstancePaths"), "${coord:dataOut('output')}");
-
- Assert.assertTrue(Storage.TYPE.TABLE == FeedHelper.getStorageType(tableFeed, trgCluster));
- assertReplicationHCatCredentials(getWorkflowapp(coord),
- coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", ""));
- }
-
- private void assertReplicationHCatCredentials(WORKFLOWAPP wf, String wfPath) throws IOException {
- FileSystem fs = trgMiniDFS.getFileSystem();
-
- Path hiveConfPath = new Path(wfPath, "conf/falcon-source-hive-site.xml");
- Assert.assertTrue(fs.exists(hiveConfPath));
-
- hiveConfPath = new Path(wfPath, "conf/falcon-target-hive-site.xml");
- Assert.assertTrue(fs.exists(hiveConfPath));
-
- boolean isSecurityEnabled = SecurityUtil.isSecurityEnabled();
- if (isSecurityEnabled) {
- Assert.assertNotNull(wf.getCredentials());
- Assert.assertEquals(2, wf.getCredentials().getCredential().size());
- }
-
- List<Object> actions = wf.getDecisionOrForkOrJoin();
- for (Object obj : actions) {
- if (!(obj instanceof ACTION)) {
- continue;
- }
- ACTION action = (ACTION) obj;
- String actionName = action.getName();
-
- if (!isSecurityEnabled) {
- Assert.assertNull(action.getCred());
- }
-
- if ("recordsize".equals(actionName)) {
- Assert.assertEquals(action.getJava().getJobXml(), "${wf:appPath()}/conf/falcon-source-hive-site.xml");
- if (isSecurityEnabled) {
- Assert.assertNotNull(action.getCred());
- Assert.assertEquals(action.getCred(), "falconSourceHiveAuth");
- }
- } else if ("table-export".equals(actionName) && isSecurityEnabled) {
- Assert.assertNotNull(action.getCred());
- Assert.assertEquals(action.getCred(), "falconSourceHiveAuth");
- } else if ("table-import".equals(actionName) && isSecurityEnabled) {
- Assert.assertNotNull(action.getCred());
- Assert.assertEquals(action.getCred(), "falconTargetHiveAuth");
- }
- }
- }
-
- private void assertTableStorageProperties(Cluster cluster, CatalogStorage tableStorage,
- Map<String, String> props, String prefix) {
- Assert.assertEquals(props.get(prefix + "NameNode"), ClusterHelper.getStorageUrl(cluster));
- Assert.assertEquals(props.get(prefix + "JobTracker"), ClusterHelper.getMREndPoint(cluster));
- Assert.assertEquals(props.get(prefix + "HcatNode"), tableStorage.getCatalogUrl());
-
- Assert.assertEquals(props.get(prefix + "Database"), tableStorage.getDatabase());
- Assert.assertEquals(props.get(prefix + "Table"), tableStorage.getTable());
- Assert.assertEquals(props.get(prefix + "Partition"), "(${coord:dataInPartitions('input', 'hive-export')})");
- }
-
- @Test
- public void testRetentionCoords() throws FalconException, JAXBException, IOException {
- org.apache.falcon.entity.v0.feed.Cluster cluster = FeedHelper.getCluster(feed, srcCluster.getName());
- final Calendar instance = Calendar.getInstance();
- instance.roll(Calendar.YEAR, 1);
- cluster.getValidity().setEnd(instance.getTime());
-
- OozieFeedWorkflowBuilder builder = new OozieFeedWorkflowBuilder(feed);
- List<COORDINATORAPP> coords = builder.getCoordinators(srcCluster, new Path("/projects/falcon/"));
- COORDINATORAPP coord = coords.get(0);
-
- Assert.assertEquals(coord.getAction().getWorkflow().getAppPath(), "${nameNode}/projects/falcon/RETENTION");
- Assert.assertEquals(coord.getName(), "FALCON_FEED_RETENTION_" + feed.getName());
- Assert.assertEquals(coord.getFrequency(), "${coord:hours(6)}");
-
- HashMap<String, String> props = new HashMap<String, String>();
- for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
- props.put(prop.getName(), prop.getValue());
- }
-
- String feedDataPath = props.get("feedDataPath");
- String storageType = props.get("falconFeedStorageType");
-
- // verify the param that feed evictor depends on
- Assert.assertEquals(storageType, Storage.TYPE.FILESYSTEM.name());
-
- final Storage storage = FeedHelper.createStorage(cluster, feed);
- if (feedDataPath != null) {
- Assert.assertEquals(feedDataPath, storage.getUriTemplate()
- .replaceAll(Storage.DOLLAR_EXPR_START_REGEX, Storage.QUESTION_EXPR_START_REGEX));
- }
-
- if (storageType != null) {
- Assert.assertEquals(storageType, storage.getType().name());
- }
-
- // verify the post processing params
- Assert.assertEquals(props.get("feedNames"), feed.getName());
- Assert.assertEquals(props.get("feedInstancePaths"), "IGNORE");
- Assert.assertEquals(props.get("logDir"), getLogPath(srcCluster, feed));
-
- assertWorkflowRetries(coord);
- }
-
- @Test (dataProvider = "secureOptions")
- public void testRetentionCoordsForTable(String secureOption) throws Exception {
- StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);
-
- org.apache.falcon.entity.v0.feed.Cluster cluster = FeedHelper.getCluster(tableFeed, trgCluster.getName());
- final Calendar instance = Calendar.getInstance();
- instance.roll(Calendar.YEAR, 1);
- cluster.getValidity().setEnd(instance.getTime());
-
- OozieFeedWorkflowBuilder builder = new OozieFeedWorkflowBuilder(tableFeed);
- List<COORDINATORAPP> coords = builder.getCoordinators(trgCluster, new Path("/projects/falcon/"));
- COORDINATORAPP coord = coords.get(0);
-
- Assert.assertEquals(coord.getAction().getWorkflow().getAppPath(), "${nameNode}/projects/falcon/RETENTION");
- Assert.assertEquals(coord.getName(), "FALCON_FEED_RETENTION_" + tableFeed.getName());
- Assert.assertEquals(coord.getFrequency(), "${coord:hours(6)}");
-
- HashMap<String, String> props = new HashMap<String, String>();
- for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
- props.put(prop.getName(), prop.getValue());
- }
-
- String feedDataPath = props.get("feedDataPath");
- String storageType = props.get("falconFeedStorageType");
-
- // verify the param that feed evictor depends on
- Assert.assertEquals(storageType, Storage.TYPE.TABLE.name());
-
- final Storage storage = FeedHelper.createStorage(cluster, tableFeed);
- if (feedDataPath != null) {
- Assert.assertEquals(feedDataPath, storage.getUriTemplate()
- .replaceAll(Storage.DOLLAR_EXPR_START_REGEX, Storage.QUESTION_EXPR_START_REGEX));
- }
-
- if (storageType != null) {
- Assert.assertEquals(storageType, storage.getType().name());
- }
-
- // verify the post processing params
- Assert.assertEquals(props.get("feedNames"), tableFeed.getName());
- Assert.assertEquals(props.get("feedInstancePaths"), "IGNORE");
- Assert.assertEquals(props.get("logDir"), getLogPath(trgCluster, tableFeed));
-
- assertWorkflowRetries(coord);
-
- Assert.assertTrue(Storage.TYPE.TABLE == FeedHelper.getStorageType(tableFeed, trgCluster));
- assertHCatCredentials(getWorkflowapp(coord),
- coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", ""));
- }
-
- private void assertHCatCredentials(WORKFLOWAPP wf, String wfPath) throws IOException {
- Path hiveConfPath = new Path(wfPath, "conf/hive-site.xml");
- FileSystem fs = trgMiniDFS.getFileSystem();
- Assert.assertTrue(fs.exists(hiveConfPath));
-
- if (SecurityUtil.isSecurityEnabled()) {
- Assert.assertNotNull(wf.getCredentials());
- Assert.assertEquals(1, wf.getCredentials().getCredential().size());
- }
-
- List<Object> actions = wf.getDecisionOrForkOrJoin();
- for (Object obj : actions) {
- if (!(obj instanceof ACTION)) {
- continue;
- }
- ACTION action = (ACTION) obj;
- String actionName = action.getName();
-
- if ("eviction".equals(actionName)) {
- Assert.assertEquals(action.getJava().getJobXml(), "${wf:appPath()}/conf/hive-site.xml");
- if (SecurityUtil.isSecurityEnabled()) {
- Assert.assertNotNull(action.getCred());
- Assert.assertEquals(action.getCred(), "falconHiveAuth");
- }
- }
- }
- }
-
- private String getLogPath(Cluster aCluster, Feed aFeed) {
- Path logPath = EntityUtil.getLogPath(aCluster, aFeed);
- return (logPath.toUri().getScheme() == null ? "${nameNode}" : "") + logPath;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/src/test/resources/feed.xml
----------------------------------------------------------------------
diff --git a/feed/src/test/resources/feed.xml b/feed/src/test/resources/feed.xml
deleted file mode 100644
index 4da222e..0000000
--- a/feed/src/test/resources/feed.xml
+++ /dev/null
@@ -1,56 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<feed description="clicks log" name="raw-logs" xmlns="uri:falcon:feed:0.1"
- >
-
- <groups>online,bi</groups>
-
- <frequency>minutes(20)</frequency>
- <timezone>UTC</timezone>
-
- <late-arrival cut-off="minutes(3)"/>
- <clusters>
- <cluster name="corp1" type="source" delay="minutes(40)">
- <validity start="2010-01-01T00:00Z" end="2020-01-01T02:00Z"/>
- <retention limit="minutes(5)" action="delete"/>
- <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
- </cluster>
- <cluster name="corp2" type="target">
- <validity start="2010-01-01T00:00Z" end="2020-01-01T02:00Z"/>
- <retention limit="minutes(7)" action="delete"/>
- <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
- </cluster>
- </clusters>
-
- <locations>
- <location type="data"
- path="/examples/input-data/rawLogs/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}"/>
- <location type="stats" path="/projects/falcon/clicksStats"/>
- <location type="meta" path="/projects/falcon/clicksMetaData"/>
- </locations>
-
- <ACL owner="testuser" group="group" permission="0x755"/>
- <schema location="/schema/clicks" provider="protobuf"/>
-
- <properties>
- <property name="field3" value="value3"/>
- <property name="field2" value="value2"/>
-
- <property name="field4" value="value2"/>
- </properties>
-</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/src/test/resources/fs-replication-feed.xml
----------------------------------------------------------------------
diff --git a/feed/src/test/resources/fs-replication-feed.xml b/feed/src/test/resources/fs-replication-feed.xml
deleted file mode 100644
index bada507..0000000
--- a/feed/src/test/resources/fs-replication-feed.xml
+++ /dev/null
@@ -1,68 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<feed description="billing RC File" name="replication-test" xmlns="uri:falcon:feed:0.1">
- <partitions>
- <partition name="colo"/>
- <partition name="eventTime"/>
- <partition name="impressionHour"/>
- <partition name="pricingModel"/>
- </partitions>
-
- <groups>online,bi</groups>
-
- <frequency>minutes(5)</frequency>
- <timezone>UTC</timezone>
- <late-arrival cut-off="minutes(1)"/>
-
- <clusters>
- <cluster partition="${cluster.colo}" type="source" name="corp1">
- <validity end="2099-01-01T00:00Z" start="2012-10-01T12:00Z"/>
- <retention action="delete" limit="days(10000)"/>
- </cluster>
- <cluster type="target" name="alpha">
- <validity end="2012-10-01T12:11Z" start="2012-10-01T12:05Z"/>
- <retention action="delete" limit="days(10000)"/>
- <locations>
- <location path="/localDC/rc/billing/ua1/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}/" type="data"/>
- </locations>
- </cluster>
- <cluster type="target" name="beta">
- <validity end="2012-10-01T12:26Z" start="2012-10-01T12:10Z"/>
- <retention action="delete" limit="days(10000)"/>
- <locations>
- <location path="/localDC/rc/billing/ua2/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}/" type="data"/>
- </locations>
- </cluster>
- </clusters>
-
- <locations>
- <location
- path="/localDC/rc/billing/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}/"
- type="data"/>
- <location path="/data/regression/fetlrc/billing/stats" type="stats"/>
- <location path="/data/regression/fetlrc/billing/metadata"
- type="meta"/>
- </locations>
-
- <ACL permission="0x755" group="group" owner="fetl"/>
- <schema provider="protobuf" location="/databus/streams_local/click_rr/schema/"/>
- <properties>
- <property name="maxMaps" value="33" />
- <property name="mapBandwidthKB" value="2048" />
- </properties>
-</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/src/test/resources/src-cluster.xml
----------------------------------------------------------------------
diff --git a/feed/src/test/resources/src-cluster.xml b/feed/src/test/resources/src-cluster.xml
deleted file mode 100644
index 730f8d2..0000000
--- a/feed/src/test/resources/src-cluster.xml
+++ /dev/null
@@ -1,40 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<cluster colo="gs1" description="" name="corp1" xmlns="uri:falcon:cluster:0.1"
- >
- <interfaces>
- <interface type="readonly" endpoint="http://localhost:50070"
- version="0.20.2"/>
- <interface type="write" endpoint="hdfs://localhost:8020"
- version="0.20.2"/>
- <interface type="execute" endpoint="localhost:8021" version="0.20.2"/>
- <interface type="workflow" endpoint="http://localhost:11000/oozie/"
- version="3.1"/>
- <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true"
- version="5.1.6"/>
- <interface type="registry" endpoint="thrift://localhost:49093" version="1"/>
- </interfaces>
- <locations>
- <location name="temp" path="/tmp"/>
- <location name="working" path="/projects/falcon/working"/>
- <location name="staging" path="/projects/falcon/staging"/>
- </locations>
- <properties>
- <property name="separator" value="-"/>
- </properties>
-</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/src/test/resources/table-replication-feed.xml
----------------------------------------------------------------------
diff --git a/feed/src/test/resources/table-replication-feed.xml b/feed/src/test/resources/table-replication-feed.xml
deleted file mode 100644
index 4c610f6..0000000
--- a/feed/src/test/resources/table-replication-feed.xml
+++ /dev/null
@@ -1,42 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<feed description="clicks log" name="raw-logs-table" xmlns="uri:falcon:feed:0.1">
-
- <frequency>minutes(20)</frequency>
- <timezone>UTC</timezone>
-
- <clusters>
- <cluster name="corp1" type="source" delay="minutes(40)">
- <validity start="2010-01-01T00:00Z" end="2010-01-01T02:00Z"/>
- <retention limit="minutes(5)" action="delete"/>
- <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
- </cluster>
- <cluster name="corp2" type="target">
- <validity start="2010-01-01T00:00Z" end="2010-01-01T02:00Z"/>
- <retention limit="minutes(7)" action="delete"/>
- <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
- <table uri="catalog:target_db:target_clicks_table#ds=${YEAR}${MONTH}${DAY};region=${region}" />
- </cluster>
- </clusters>
-
- <table uri="catalog:source_db:source_clicks_table#ds=${YEAR}${MONTH}${DAY};region=${region}" />
-
- <ACL owner="testuser" group="group" permission="0x755"/>
- <schema location="/schema/clicks" provider="protobuf"/>
-
-</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/src/test/resources/trg-cluster-alpha.xml
----------------------------------------------------------------------
diff --git a/feed/src/test/resources/trg-cluster-alpha.xml b/feed/src/test/resources/trg-cluster-alpha.xml
deleted file mode 100644
index 1fb07cb..0000000
--- a/feed/src/test/resources/trg-cluster-alpha.xml
+++ /dev/null
@@ -1,39 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<cluster colo="ua1" description="" name="alpha" xmlns="uri:falcon:cluster:0.1">
- <interfaces>
- <interface type="readonly" endpoint="http://localhost:50070"
- version="0.20.2"/>
- <interface type="write" endpoint="hdfs://localhost:8020"
- version="0.20.2"/>
- <interface type="execute" endpoint="localhost:8021" version="0.20.2"/>
- <interface type="workflow" endpoint="http://localhost:11000/oozie/"
- version="3.1"/>
- <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true"
- version="5.1.6"/>
- <interface type="registry" endpoint="thrift://localhost:59093" version="1"/>
- </interfaces>
- <locations>
- <location name="temp" path="/tmp"/>
- <location name="working" path="/projects/falcon/working"/>
- <location name="staging" path="/projects/falcon/staging2"/>
- </locations>
- <properties>
- <property name="separator" value="-"/>
- </properties>
-</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/src/test/resources/trg-cluster-beta.xml
----------------------------------------------------------------------
diff --git a/feed/src/test/resources/trg-cluster-beta.xml b/feed/src/test/resources/trg-cluster-beta.xml
deleted file mode 100644
index 0bf0bcd..0000000
--- a/feed/src/test/resources/trg-cluster-beta.xml
+++ /dev/null
@@ -1,39 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<cluster colo="ua2" description="" name="beta" xmlns="uri:falcon:cluster:0.1">
- <interfaces>
- <interface type="readonly" endpoint="http://localhost:50070"
- version="0.20.2"/>
- <interface type="write" endpoint="hdfs://localhost:8020"
- version="0.20.2"/>
- <interface type="execute" endpoint="localhost:8021" version="0.20.2"/>
- <interface type="workflow" endpoint="http://localhost:11000/oozie/"
- version="3.1"/>
- <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true"
- version="5.1.6"/>
- <interface type="registry" endpoint="thrift://localhost:59093" version="1"/>
- </interfaces>
- <locations>
- <location name="temp" path="/tmp"/>
- <location name="working" path="/projects/falcon/working"/>
- <location name="staging" path="/projects/falcon/staging2"/>
- </locations>
- <properties>
- <property name="separator" value="-"/>
- </properties>
-</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/src/test/resources/trg-cluster.xml
----------------------------------------------------------------------
diff --git a/feed/src/test/resources/trg-cluster.xml b/feed/src/test/resources/trg-cluster.xml
deleted file mode 100644
index 8260fda..0000000
--- a/feed/src/test/resources/trg-cluster.xml
+++ /dev/null
@@ -1,40 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<cluster colo="gs2" description="" name="corp2" xmlns="uri:falcon:cluster:0.1"
- >
- <interfaces>
- <interface type="readonly" endpoint="http://localhost:50070"
- version="0.20.2"/>
- <interface type="write" endpoint="hdfs://localhost:8020"
- version="0.20.2"/>
- <interface type="execute" endpoint="localhost:8021" version="0.20.2"/>
- <interface type="workflow" endpoint="http://localhost:11000/oozie/"
- version="3.1"/>
- <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true"
- version="5.1.6"/>
- <interface type="registry" endpoint="thrift://localhost:59093" version="1"/>
- </interfaces>
- <locations>
- <location name="temp" path="/tmp"/>
- <location name="working" path="/projects/falcon/working"/>
- <location name="staging" path="/projects/falcon/staging2"/>
- </locations>
- <properties>
- <property name="separator" value="-"/>
- </properties>
-</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/OozieBundleBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/OozieBundleBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/OozieBundleBuilder.java
new file mode 100644
index 0000000..2018db2
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/OozieBundleBuilder.java
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie;
+
+import org.apache.falcon.FalconException;
+import org.apache.falcon.entity.ClusterHelper;
+import org.apache.falcon.entity.EntityUtil;
+import org.apache.falcon.entity.v0.Entity;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.hadoop.HadoopClientFactory;
+import org.apache.falcon.oozie.bundle.BUNDLEAPP;
+import org.apache.falcon.oozie.bundle.COORDINATOR;
+import org.apache.falcon.security.CurrentUser;
+import org.apache.falcon.util.OozieUtils;
+import org.apache.falcon.workflow.engine.OozieWorkflowEngine;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.oozie.client.OozieClient;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * Base class for building oozie bundle - bundle is the entity that falcon tracks in oozie.
+ * @param <T>
+ */
+public abstract class OozieBundleBuilder<T extends Entity> extends OozieEntityBuilder<T> {
+ public static final Logger LOG = LoggerFactory.getLogger(OozieBundleBuilder.class);
+
+ public OozieBundleBuilder(T entity) {
+ super(entity);
+ }
+
+ @Override public Properties build(Cluster cluster, Path buildPath) throws FalconException {
+ String clusterName = cluster.getName();
+ if (EntityUtil.getStartTime(entity, clusterName).compareTo(EntityUtil.getEndTime(entity, clusterName)) >= 0) {
+ LOG.info("process validity start <= end for cluster {}. Skipping schedule", clusterName);
+ return null;
+ }
+
+ List<Properties> coords = doBuild(cluster, buildPath);
+ if (coords == null || coords.isEmpty()) {
+ return null;
+ }
+
+ BUNDLEAPP bundle = new BUNDLEAPP();
+ bundle.setName(EntityUtil.getWorkflowName(entity).toString());
+ // all the properties are set prior to bundle and coordinators creation
+
+ createLogsDir(cluster, buildPath); //create logs dir
+
+ for (Properties coordProps : coords) {
+ // add the coordinator to the bundle
+ COORDINATOR coord = new COORDINATOR();
+ String coordPath = coordProps.getProperty(OozieEntityBuilder.ENTITY_PATH);
+ coord.setName(coordProps.getProperty(OozieEntityBuilder.ENTITY_NAME));
+ coord.setAppPath(getStoragePath(coordPath));
+ bundle.getCoordinator().add(coord);
+ }
+
+ marshal(cluster, bundle, buildPath); // write the bundle
+ Properties properties = createAppProperties(cluster, buildPath);
+
+ //Add libpath
+ Path libPath = getLibPath(cluster, buildPath);
+ if (libPath != null) {
+ properties.put(OozieClient.LIBPATH, getStoragePath(libPath));
+ }
+
+ properties.putAll(getAdditionalProperties(cluster));
+ return properties;
+ }
+
+ protected Properties getAdditionalProperties(Cluster cluster) throws FalconException {
+ return new Properties();
+ }
+
+ protected abstract Path getLibPath(Cluster cluster, Path buildPath) throws FalconException;
+
+ protected Properties createAppProperties(Cluster cluster, Path buildPath) throws FalconException {
+ Properties properties = getEntityProperties(cluster);
+ properties.setProperty(OozieWorkflowEngine.NAME_NODE, ClusterHelper.getStorageUrl(cluster));
+ properties.setProperty(OozieWorkflowEngine.JOB_TRACKER, ClusterHelper.getMREndPoint(cluster));
+ properties.setProperty(OozieClient.BUNDLE_APP_PATH, getStoragePath(buildPath));
+ properties.setProperty("colo.name", cluster.getColo());
+
+ properties.setProperty(OozieClient.USER_NAME, CurrentUser.getUser());
+ properties.setProperty(OozieClient.USE_SYSTEM_LIBPATH, "true");
+ properties.setProperty("falcon.libpath", ClusterHelper.getLocation(cluster, "working") + "/lib");
+
+ if (isTableStorageType(cluster)) {
+ properties.putAll(getHiveCredentials(cluster));
+ }
+
+ LOG.info("Cluster: {}, PROPS: {}", cluster.getName(), properties);
+ return properties;
+ }
+
+ private void createLogsDir(Cluster cluster, Path buildPath) throws FalconException {
+ try {
+ FileSystem fs = HadoopClientFactory.get().createFileSystem(buildPath.toUri(),
+ ClusterHelper.getConfiguration(cluster));
+ Path logsDir = new Path(buildPath.getParent(), "logs");
+ if (!fs.mkdirs(logsDir)) {
+ throw new FalconException("Failed to create " + logsDir);
+ }
+
+ // logs are copied with in oozie as the user in Post Processing and hence 777 permissions
+ FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL);
+ fs.setPermission(logsDir, permission);
+ } catch (IOException e) {
+ throw new FalconException(e);
+ }
+ }
+
+ protected void marshal(Cluster cluster, BUNDLEAPP bundle, Path outPath) throws FalconException {
+ marshal(cluster, new org.apache.falcon.oozie.bundle.ObjectFactory().createBundleApp(bundle),
+ OozieUtils.BUNDLE_JAXB_CONTEXT, new Path(outPath, "bundle.xml"));
+ }
+
+ protected abstract List<Properties> doBuild(Cluster cluster, Path bundlePath) throws FalconException;
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/OozieCoordinatorBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/OozieCoordinatorBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/OozieCoordinatorBuilder.java
new file mode 100644
index 0000000..5a29683
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/OozieCoordinatorBuilder.java
@@ -0,0 +1,181 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.falcon.FalconException;
+import org.apache.falcon.Tag;
+import org.apache.falcon.entity.ClusterHelper;
+import org.apache.falcon.entity.EntityUtil;
+import org.apache.falcon.entity.ExternalId;
+import org.apache.falcon.entity.v0.Entity;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.messaging.EntityInstanceMessage.ARG;
+import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
+import org.apache.falcon.oozie.coordinator.ObjectFactory;
+import org.apache.falcon.oozie.feed.FeedReplicationCoordinatorBuilder;
+import org.apache.falcon.oozie.feed.FeedRetentionCoordinatorBuilder;
+import org.apache.falcon.oozie.process.ProcessExecutionCoordinatorBuilder;
+import org.apache.falcon.util.OozieUtils;
+import org.apache.falcon.util.StartupProperties;
+import org.apache.hadoop.fs.Path;
+import org.apache.oozie.client.OozieClient;
+
+import javax.xml.bind.JAXBElement;
+import javax.xml.bind.JAXBException;
+import javax.xml.bind.Unmarshaller;
+import java.io.InputStream;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+/**
+ * Base class for building oozie coordinator.
+ * @param <T>
+ */
+public abstract class OozieCoordinatorBuilder<T extends Entity> extends OozieEntityBuilder<T> {
+ protected static final String NOMINAL_TIME_EL = "${coord:formatTime(coord:nominalTime(), 'yyyy-MM-dd-HH-mm')}";
+ protected static final String ACTUAL_TIME_EL = "${coord:formatTime(coord:actualTime(), 'yyyy-MM-dd-HH-mm')}";
+ protected static final Long DEFAULT_BROKER_MSG_TTL = 3 * 24 * 60L;
+ protected static final String MR_QUEUE_NAME = "queueName";
+ protected static final String MR_JOB_PRIORITY = "jobPriority";
+
+ protected static final String IGNORE = "IGNORE";
+ protected final Tag lifecycle;
+
+ public OozieCoordinatorBuilder(T entity, Tag tag) {
+ super(entity);
+ this.lifecycle = tag;
+ }
+
+ public static final OozieCoordinatorBuilder get(Entity entity, Tag tag) {
+ switch(entity.getEntityType()) {
+ case FEED:
+ switch (tag) {
+ case RETENTION:
+ return new FeedRetentionCoordinatorBuilder((Feed)entity);
+
+ case REPLICATION:
+ return new FeedReplicationCoordinatorBuilder((Feed)entity);
+
+ default:
+ throw new IllegalArgumentException("Unhandled type " + entity.getEntityType() + ", lifecycle " + tag);
+ }
+
+ case PROCESS:
+ return new ProcessExecutionCoordinatorBuilder((org.apache.falcon.entity.v0.process.Process) entity);
+
+ default:
+ break;
+ }
+
+ throw new IllegalArgumentException("Unhandled type " + entity.getEntityType() + ", lifecycle " + tag);
+ }
+
+ protected Path getBuildPath(Path buildPath) {
+ return new Path(buildPath, lifecycle.name());
+ }
+
+ protected String getEntityName() {
+ return EntityUtil.getWorkflowName(lifecycle, entity).toString();
+ }
+
+ protected void marshal(Cluster cluster, COORDINATORAPP coord, Path outPath) throws FalconException {
+ marshal(cluster, new ObjectFactory().createCoordinatorApp(coord),
+ OozieUtils.COORD_JAXB_CONTEXT, new Path(outPath, "coordinator.xml"));
+ }
+
+ protected Properties createCoordDefaultConfiguration(Cluster cluster, String coordName) throws FalconException {
+ Properties props = new Properties();
+ props.put(ARG.entityName.getPropName(), entity.getName());
+ props.put(ARG.nominalTime.getPropName(), NOMINAL_TIME_EL);
+ props.put(ARG.timeStamp.getPropName(), ACTUAL_TIME_EL);
+ props.put("userBrokerUrl", ClusterHelper.getMessageBrokerUrl(cluster));
+ props.put("userBrokerImplClass", ClusterHelper.getMessageBrokerImplClass(cluster));
+ String falconBrokerUrl = StartupProperties.get().getProperty(ARG.brokerUrl.getPropName(),
+ "tcp://localhost:61616?daemon=true");
+ props.put(ARG.brokerUrl.getPropName(), falconBrokerUrl);
+ String falconBrokerImplClass = StartupProperties.get().getProperty(ARG.brokerImplClass.getPropName(),
+ ClusterHelper.DEFAULT_BROKER_IMPL_CLASS);
+ props.put(ARG.brokerImplClass.getPropName(), falconBrokerImplClass);
+ String jmsMessageTTL = StartupProperties.get().getProperty("broker.ttlInMins",
+ DEFAULT_BROKER_MSG_TTL.toString());
+ props.put(ARG.brokerTTL.getPropName(), jmsMessageTTL);
+ props.put(ARG.entityType.getPropName(), entity.getEntityType().name());
+ props.put("logDir", getStoragePath(new Path(EntityUtil.getBaseStagingPath(cluster, entity), "logs")));
+ props.put(OozieClient.EXTERNAL_ID,
+ new ExternalId(entity.getName(), EntityUtil.getWorkflowNameTag(coordName, entity),
+ "${coord:nominalTime()}").getId());
+ props.put("workflowEngineUrl", ClusterHelper.getOozieUrl(cluster));
+
+ if (EntityUtil.getLateProcess(entity) == null
+ || EntityUtil.getLateProcess(entity).getLateInputs() == null
+ || EntityUtil.getLateProcess(entity).getLateInputs().size() == 0) {
+ props.put("shouldRecord", "false");
+ } else {
+ props.put("shouldRecord", "true");
+ }
+
+ props.put("entityName", entity.getName());
+ props.put("entityType", entity.getEntityType().name().toLowerCase());
+ props.put(ARG.cluster.getPropName(), cluster.getName());
+
+ props.put(MR_QUEUE_NAME, "default");
+ props.put(MR_JOB_PRIORITY, "NORMAL");
+ //props in entity override the set props.
+ props.putAll(getEntityProperties(entity));
+ return props;
+ }
+
+ protected org.apache.falcon.oozie.coordinator.CONFIGURATION getConfig(Properties props) {
+ org.apache.falcon.oozie.coordinator.CONFIGURATION conf
+ = new org.apache.falcon.oozie.coordinator.CONFIGURATION();
+ for (Entry<Object, Object> prop : props.entrySet()) {
+ org.apache.falcon.oozie.coordinator.CONFIGURATION.Property confProp
+ = new org.apache.falcon.oozie.coordinator.CONFIGURATION.Property();
+ confProp.setName((String) prop.getKey());
+ confProp.setValue((String) prop.getValue());
+ conf.getProperty().add(confProp);
+ }
+ return conf;
+ }
+
+ public final Properties build(Cluster cluster, Path buildPath) throws FalconException {
+ throw new IllegalStateException("Not implemented for coordinator!");
+ }
+
+ public abstract List<Properties> buildCoords(Cluster cluster, Path buildPath) throws FalconException;
+
+ protected COORDINATORAPP getCoordinatorTemplate(String template) throws FalconException {
+ InputStream resourceAsStream = null;
+ try {
+ resourceAsStream = OozieCoordinatorBuilder.class.getResourceAsStream(template);
+ Unmarshaller unmarshaller = OozieUtils.COORD_JAXB_CONTEXT.createUnmarshaller();
+ @SuppressWarnings("unchecked") JAXBElement<COORDINATORAPP> jaxbElement = (JAXBElement<COORDINATORAPP>)
+ unmarshaller.unmarshal(resourceAsStream);
+ return jaxbElement.getValue();
+ } catch (JAXBException e) {
+ throw new FalconException(e);
+ } finally {
+ IOUtils.closeQuietly(resourceAsStream);
+ }
+ }
+
+}
[6/9] FALCON-369 Refactor workflow builder. Contributed by Shwetha GS
Posted by sh...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/process/ProcessExecutionCoordinatorBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/process/ProcessExecutionCoordinatorBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/process/ProcessExecutionCoordinatorBuilder.java
new file mode 100644
index 0000000..c87bc86
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/process/ProcessExecutionCoordinatorBuilder.java
@@ -0,0 +1,336 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie.process;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.falcon.FalconException;
+import org.apache.falcon.Tag;
+import org.apache.falcon.entity.CatalogStorage;
+import org.apache.falcon.entity.EntityUtil;
+import org.apache.falcon.entity.FeedHelper;
+import org.apache.falcon.entity.ProcessHelper;
+import org.apache.falcon.entity.Storage;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.Frequency;
+import org.apache.falcon.entity.v0.SchemaHelper;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.entity.v0.feed.LocationType;
+import org.apache.falcon.entity.v0.process.Input;
+import org.apache.falcon.entity.v0.process.Output;
+import org.apache.falcon.entity.v0.process.Process;
+import org.apache.falcon.entity.v0.process.Workflow;
+import org.apache.falcon.expression.ExpressionHelper;
+import org.apache.falcon.messaging.EntityInstanceMessage.ARG;
+import org.apache.falcon.oozie.OozieCoordinatorBuilder;
+import org.apache.falcon.oozie.OozieEntityBuilder;
+import org.apache.falcon.oozie.OozieOrchestrationWorkflowBuilder;
+import org.apache.falcon.oozie.coordinator.CONTROLS;
+import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
+import org.apache.falcon.oozie.coordinator.DATAIN;
+import org.apache.falcon.oozie.coordinator.DATAOUT;
+import org.apache.falcon.oozie.coordinator.DATASETS;
+import org.apache.falcon.oozie.coordinator.INPUTEVENTS;
+import org.apache.falcon.oozie.coordinator.OUTPUTEVENTS;
+import org.apache.falcon.oozie.coordinator.SYNCDATASET;
+import org.apache.falcon.oozie.coordinator.WORKFLOW;
+import org.apache.hadoop.fs.Path;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * Builds oozie coordinator for process.
+ */
+public class ProcessExecutionCoordinatorBuilder extends OozieCoordinatorBuilder<Process> {
+ private static final int THIRTY_MINUTES = 30 * 60 * 1000;
+
+ public ProcessExecutionCoordinatorBuilder(Process entity) {
+ super(entity, Tag.DEFAULT);
+ }
+
+ @Override public List<Properties> buildCoords(Cluster cluster, Path buildPath) throws FalconException {
+ String coordName = getEntityName();
+ Path coordPath = getBuildPath(buildPath);
+ copySharedLibs(cluster, new Path(coordPath, "lib"));
+
+ COORDINATORAPP coord = new COORDINATORAPP();
+ // coord attributes
+ initializeCoordAttributes(cluster, coord, coordName);
+
+ CONTROLS controls = initializeControls(); // controls
+ coord.setControls(controls);
+
+ // Configuration
+ Properties props = createCoordDefaultConfiguration(cluster, coordName);
+
+ initializeInputPaths(cluster, coord, props); // inputs
+ initializeOutputPaths(cluster, coord, props); // outputs
+
+ Workflow processWorkflow = entity.getWorkflow();
+ propagateUserWorkflowProperties(processWorkflow, props);
+
+ // create parent wf
+ Properties wfProps = OozieOrchestrationWorkflowBuilder.get(entity, Tag.DEFAULT).build(cluster, coordPath);
+
+ WORKFLOW wf = new WORKFLOW();
+ wf.setAppPath(getStoragePath(wfProps.getProperty(OozieEntityBuilder.ENTITY_PATH)));
+ wf.setConfiguration(getConfig(props));
+
+ // set coord action to parent wf
+ org.apache.falcon.oozie.coordinator.ACTION action = new org.apache.falcon.oozie.coordinator.ACTION();
+ action.setWorkflow(wf);
+ coord.setAction(action);
+
+ marshal(cluster, coord, coordPath);
+ return Arrays.asList(getProperties(coordPath, coordName));
+ }
+
+ private void initializeCoordAttributes(Cluster cluster, COORDINATORAPP coord, String coordName) {
+ coord.setName(coordName);
+ org.apache.falcon.entity.v0.process.Cluster processCluster = ProcessHelper.getCluster(entity,
+ cluster.getName());
+ coord.setStart(SchemaHelper.formatDateUTC(processCluster.getValidity().getStart()));
+ coord.setEnd(SchemaHelper.formatDateUTC(processCluster.getValidity().getEnd()));
+ coord.setTimezone(entity.getTimezone().getID());
+ coord.setFrequency("${coord:" + entity.getFrequency().toString() + "}");
+ }
+
+ private CONTROLS initializeControls()
+ throws FalconException {
+ CONTROLS controls = new CONTROLS();
+ controls.setConcurrency(String.valueOf(entity.getParallel()));
+ controls.setExecution(entity.getOrder().name());
+
+ Frequency timeout = entity.getTimeout();
+ long frequencyInMillis = ExpressionHelper.get().evaluate(entity.getFrequency().toString(), Long.class);
+ long timeoutInMillis;
+ if (timeout != null) {
+ timeoutInMillis = ExpressionHelper.get().
+ evaluate(entity.getTimeout().toString(), Long.class);
+ } else {
+ timeoutInMillis = frequencyInMillis * 6;
+ if (timeoutInMillis < THIRTY_MINUTES) {
+ timeoutInMillis = THIRTY_MINUTES;
+ }
+ }
+ controls.setTimeout(String.valueOf(timeoutInMillis / (1000 * 60)));
+
+ if (timeoutInMillis / frequencyInMillis * 2 > 0) {
+ controls.setThrottle(String.valueOf(timeoutInMillis / frequencyInMillis * 2));
+ }
+
+ return controls;
+ }
+
+ private void initializeInputPaths(Cluster cluster, COORDINATORAPP coord, Properties props) throws FalconException {
+ if (entity.getInputs() == null) {
+ props.put("falconInputFeeds", "NONE");
+ props.put("falconInPaths", IGNORE);
+ return;
+ }
+
+ List<String> inputFeeds = new ArrayList<String>();
+ List<String> inputPaths = new ArrayList<String>();
+ List<String> inputFeedStorageTypes = new ArrayList<String>();
+ for (Input input : entity.getInputs().getInputs()) {
+ Feed feed = EntityUtil.getEntity(EntityType.FEED, input.getFeed());
+ Storage storage = FeedHelper.createStorage(cluster, feed);
+
+ if (!input.isOptional()) {
+ if (coord.getDatasets() == null) {
+ coord.setDatasets(new DATASETS());
+ }
+ if (coord.getInputEvents() == null) {
+ coord.setInputEvents(new INPUTEVENTS());
+ }
+
+ SYNCDATASET syncdataset = createDataSet(feed, cluster, storage, input.getName(), LocationType.DATA);
+ coord.getDatasets().getDatasetOrAsyncDataset().add(syncdataset);
+
+ DATAIN datain = createDataIn(input);
+ coord.getInputEvents().getDataIn().add(datain);
+ }
+
+ String inputExpr = null;
+ if (storage.getType() == Storage.TYPE.FILESYSTEM) {
+ inputExpr = getELExpression("dataIn('" + input.getName() + "', '" + input.getPartition() + "')");
+ props.put(input.getName(), inputExpr);
+ } else if (storage.getType() == Storage.TYPE.TABLE) {
+ inputExpr = "${coord:dataIn('" + input.getName() + "')}";
+ propagateCatalogTableProperties(input, (CatalogStorage) storage, props);
+ }
+
+ inputFeeds.add(feed.getName());
+ inputPaths.add(inputExpr);
+ inputFeedStorageTypes.add(storage.getType().name());
+ }
+
+ propagateLateDataProperties(inputFeeds, inputPaths, inputFeedStorageTypes, props);
+ }
+
+ private void propagateLateDataProperties(List<String> inputFeeds, List<String> inputPaths,
+ List<String> inputFeedStorageTypes, Properties props) {
+ // populate late data handler - should-record action
+ props.put("falconInputFeeds", StringUtils.join(inputFeeds, '#'));
+ props.put("falconInPaths", StringUtils.join(inputPaths, '#'));
+
+ // storage type for each corresponding feed sent as a param to LateDataHandler
+ // needed to compute usage based on storage type in LateDataHandler
+ props.put("falconInputFeedStorageTypes", StringUtils.join(inputFeedStorageTypes, '#'));
+ }
+
+ private SYNCDATASET createDataSet(Feed feed, Cluster cluster, Storage storage,
+ String datasetName, LocationType locationType) throws FalconException {
+ SYNCDATASET syncdataset = new SYNCDATASET();
+ syncdataset.setName(datasetName);
+ syncdataset.setFrequency("${coord:" + feed.getFrequency().toString() + "}");
+
+ String uriTemplate = storage.getUriTemplate(locationType);
+ if (storage.getType() == Storage.TYPE.TABLE) {
+ uriTemplate = uriTemplate.replace("thrift", "hcat"); // Oozie requires this!!!
+ }
+ syncdataset.setUriTemplate(uriTemplate);
+
+ org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName());
+ syncdataset.setInitialInstance(SchemaHelper.formatDateUTC(feedCluster.getValidity().getStart()));
+ syncdataset.setTimezone(feed.getTimezone().getID());
+
+ if (feed.getAvailabilityFlag() == null) {
+ syncdataset.setDoneFlag("");
+ } else {
+ syncdataset.setDoneFlag(feed.getAvailabilityFlag());
+ }
+
+ return syncdataset;
+ }
+
+ private DATAIN createDataIn(Input input) {
+ DATAIN datain = new DATAIN();
+ datain.setName(input.getName());
+ datain.setDataset(input.getName());
+ datain.setStartInstance(getELExpression(input.getStart()));
+ datain.setEndInstance(getELExpression(input.getEnd()));
+ return datain;
+ }
+
+ private String getELExpression(String expr) {
+ if (expr != null) {
+ expr = "${" + expr + "}";
+ }
+ return expr;
+ }
+
+ private void initializeOutputPaths(Cluster cluster, COORDINATORAPP coord, Properties props) throws FalconException {
+ if (entity.getOutputs() == null) {
+ props.put(ARG.feedNames.getPropName(), "NONE");
+ props.put(ARG.feedInstancePaths.getPropName(), IGNORE);
+ return;
+ }
+
+ if (coord.getDatasets() == null) {
+ coord.setDatasets(new DATASETS());
+ }
+
+ if (coord.getOutputEvents() == null) {
+ coord.setOutputEvents(new OUTPUTEVENTS());
+ }
+
+ List<String> outputFeeds = new ArrayList<String>();
+ List<String> outputPaths = new ArrayList<String>();
+ for (Output output : entity.getOutputs().getOutputs()) {
+ Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());
+ Storage storage = FeedHelper.createStorage(cluster, feed);
+
+ SYNCDATASET syncdataset = createDataSet(feed, cluster, storage, output.getName(), LocationType.DATA);
+ coord.getDatasets().getDatasetOrAsyncDataset().add(syncdataset);
+
+ DATAOUT dataout = createDataOut(output);
+ coord.getOutputEvents().getDataOut().add(dataout);
+
+ String outputExpr = "${coord:dataOut('" + output.getName() + "')}";
+ outputFeeds.add(feed.getName());
+ outputPaths.add(outputExpr);
+
+ if (storage.getType() == Storage.TYPE.FILESYSTEM) {
+ props.put(output.getName(), outputExpr);
+
+ propagateFileSystemProperties(output, feed, cluster, coord, storage, props);
+ } else if (storage.getType() == Storage.TYPE.TABLE) {
+ propagateCatalogTableProperties(output, (CatalogStorage) storage, props);
+ }
+ }
+
+ // Output feed name and path for parent workflow
+ props.put(ARG.feedNames.getPropName(), StringUtils.join(outputFeeds, ','));
+ props.put(ARG.feedInstancePaths.getPropName(), StringUtils.join(outputPaths, ','));
+ }
+
+ private DATAOUT createDataOut(Output output) {
+ DATAOUT dataout = new DATAOUT();
+ dataout.setName(output.getName());
+ dataout.setDataset(output.getName());
+ dataout.setInstance(getELExpression(output.getInstance()));
+ return dataout;
+ }
+
+ private void propagateFileSystemProperties(Output output, Feed feed, Cluster cluster, COORDINATORAPP coord,
+ Storage storage, Properties props) throws FalconException {
+ // stats and meta paths
+ createOutputEvent(output, feed, cluster, LocationType.STATS, coord, props, storage);
+ createOutputEvent(output, feed, cluster, LocationType.META, coord, props, storage);
+ createOutputEvent(output, feed, cluster, LocationType.TMP, coord, props, storage);
+ }
+
+ //SUSPEND CHECKSTYLE CHECK ParameterNumberCheck
+ private void createOutputEvent(Output output, Feed feed, Cluster cluster, LocationType locType,
+ COORDINATORAPP coord, Properties props, Storage storage) throws FalconException {
+ String name = output.getName();
+ String type = locType.name().toLowerCase();
+
+ SYNCDATASET dataset = createDataSet(feed, cluster, storage, name + type, locType);
+ coord.getDatasets().getDatasetOrAsyncDataset().add(dataset);
+
+ DATAOUT dataout = new DATAOUT();
+ dataout.setName(name + type);
+ dataout.setDataset(name + type);
+ dataout.setInstance(getELExpression(output.getInstance()));
+
+ OUTPUTEVENTS outputEvents = coord.getOutputEvents();
+ if (outputEvents == null) {
+ outputEvents = new OUTPUTEVENTS();
+ coord.setOutputEvents(outputEvents);
+ }
+ outputEvents.getDataOut().add(dataout);
+
+ String outputExpr = "${coord:dataOut('" + name + type + "')}";
+ props.put(name + "." + type, outputExpr);
+ }
+ //RESUME CHECKSTYLE CHECK ParameterNumberCheck
+
+ private void propagateUserWorkflowProperties(Workflow processWorkflow, Properties props) {
+ props.put("userWorkflowName", ProcessHelper.getProcessWorkflowName(
+ processWorkflow.getName(), entity.getName()));
+ props.put("userWorkflowVersion", processWorkflow.getVersion());
+ props.put("userWorkflowEngine", processWorkflow.getEngine().value());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/process/ProcessExecutionWorkflowBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/process/ProcessExecutionWorkflowBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/process/ProcessExecutionWorkflowBuilder.java
new file mode 100644
index 0000000..0d9abdb
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/process/ProcessExecutionWorkflowBuilder.java
@@ -0,0 +1,233 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie.process;
+
+import org.apache.falcon.FalconException;
+import org.apache.falcon.Tag;
+import org.apache.falcon.entity.CatalogStorage;
+import org.apache.falcon.entity.ClusterHelper;
+import org.apache.falcon.entity.EntityUtil;
+import org.apache.falcon.entity.FeedHelper;
+import org.apache.falcon.entity.Storage;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.entity.v0.process.Input;
+import org.apache.falcon.entity.v0.process.Output;
+import org.apache.falcon.entity.v0.process.Process;
+import org.apache.falcon.oozie.OozieOrchestrationWorkflowBuilder;
+import org.apache.falcon.oozie.workflow.ACTION;
+import org.apache.falcon.oozie.workflow.CONFIGURATION;
+import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.Set;
+
+/**
+ * Base class for building orchestration workflow for process.
+ */
+public abstract class ProcessExecutionWorkflowBuilder extends OozieOrchestrationWorkflowBuilder<Process> {
+ private static final String DEFAULT_WF_TEMPLATE = "/workflow/process-parent-workflow.xml";
+ private static final Set<String> FALCON_PROCESS_HIVE_ACTIONS = new HashSet<String>(
+ Arrays.asList(new String[]{"recordsize", "user-oozie-workflow", "user-pig-job", "user-hive-job", }));
+
+ protected ProcessExecutionWorkflowBuilder(Process entity) {
+ super(entity, Tag.DEFAULT);
+ }
+
+ @Override public Properties build(Cluster cluster, Path buildPath) throws FalconException {
+ WORKFLOWAPP wfApp = getWorkflow(DEFAULT_WF_TEMPLATE);
+ String wfName = EntityUtil.getWorkflowName(Tag.DEFAULT, entity).toString();
+ wfApp.setName(wfName);
+
+ addLibExtensionsToWorkflow(cluster, wfApp, null);
+
+ final boolean isTableStorageType = isTableStorageType(cluster);
+ if (isTableStorageType) {
+ setupHiveCredentials(cluster, buildPath, wfApp);
+ }
+
+ for (Object object : wfApp.getDecisionOrForkOrJoin()) {
+ if (!(object instanceof ACTION)) {
+ continue;
+ }
+
+ ACTION action = (ACTION) object;
+ String actionName = action.getName();
+ if (FALCON_ACTIONS.contains(actionName)) {
+ decorateWithOozieRetries(action);
+ if (isTableStorageType && actionName.equals("recordsize")) {
+ // adds hive-site.xml in actions classpath
+ action.getJava().setJobXml("${wf:appPath()}/conf/hive-site.xml");
+ }
+ }
+
+ decorateAction(action, cluster, buildPath);
+ }
+
+ //Create parent workflow
+ marshal(cluster, wfApp, buildPath);
+ return getProperties(buildPath, wfName);
+ }
+
+ protected abstract void decorateAction(ACTION action, Cluster cluster, Path buildPath) throws FalconException;
+
+ private void setupHiveCredentials(Cluster cluster, Path buildPath, WORKFLOWAPP wfApp) throws FalconException {
+ // create hive-site.xml file so actions can use it in the classpath
+ createHiveConfiguration(cluster, buildPath, ""); // DO NOT ADD PREFIX!!!
+
+ if (isSecurityEnabled) {
+ // add hcatalog credentials for secure mode and add a reference to each action
+ addHCatalogCredentials(wfApp, cluster, HIVE_CREDENTIAL_NAME, FALCON_PROCESS_HIVE_ACTIONS);
+ }
+ }
+
+ protected void addInputFeedsAsParams(List<String> paramList, Cluster cluster) throws FalconException {
+ if (entity.getInputs() == null) {
+ return;
+ }
+
+ for (Input input : entity.getInputs().getInputs()) {
+ Feed feed = EntityUtil.getEntity(EntityType.FEED, input.getFeed());
+ Storage storage = FeedHelper.createStorage(cluster, feed);
+
+ final String inputName = input.getName();
+ if (storage.getType() == Storage.TYPE.FILESYSTEM) {
+ paramList.add(inputName + "=${" + inputName + "}"); // no prefix for backwards compatibility
+ } else if (storage.getType() == Storage.TYPE.TABLE) {
+ final String paramName = "falcon_" + inputName; // prefix 'falcon' for new params
+ Properties props = new Properties();
+ propagateCommonCatalogTableProperties((CatalogStorage) storage, props, paramName);
+ for (Object key : props.keySet()) {
+ paramList.add(key + "=${wf:conf('" + key + "')}");
+ }
+
+ paramList.add(paramName + "_filter=${wf:conf('"
+ + paramName + "_partition_filter_" + entity.getWorkflow().getEngine().name().toLowerCase() + "')}");
+ }
+ }
+ }
+
+ protected void addOutputFeedsAsParams(List<String> paramList, Cluster cluster) throws FalconException {
+ if (entity.getOutputs() == null) {
+ return;
+ }
+
+ for (Output output : entity.getOutputs().getOutputs()) {
+ Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());
+ Storage storage = FeedHelper.createStorage(cluster, feed);
+
+ if (storage.getType() == Storage.TYPE.FILESYSTEM) {
+ final String outputName = output.getName(); // no prefix for backwards compatibility
+ paramList.add(outputName + "=${" + outputName + "}");
+ } else if (storage.getType() == Storage.TYPE.TABLE) {
+ Properties props = new Properties();
+ propagateCatalogTableProperties(output, (CatalogStorage) storage, props); // prefix is auto added
+ for (Object key : props.keySet()) {
+ paramList.add(key + "=${wf:conf('" + key + "')}");
+ }
+
+ final String paramName = "falcon_" + output.getName(); // prefix 'falcon' for new params
+ paramList.add(paramName + "_partitions=${wf:conf('"
+ + paramName + "_partitions_" + entity.getWorkflow().getEngine().name().toLowerCase() + "')}");
+ }
+ }
+ }
+
+ protected void propagateEntityProperties(CONFIGURATION conf, List<String> paramList) {
+ Properties entityProperties = getEntityProperties(entity);
+
+ // Propagate user defined properties to job configuration
+ final List<org.apache.falcon.oozie.workflow.CONFIGURATION.Property> configuration = conf.getProperty();
+
+ // Propagate user defined properties to pig script as macros
+ // passed as parameters -p name=value that can be accessed as $name
+ for (Entry<Object, Object> entry: entityProperties.entrySet()) {
+ org.apache.falcon.oozie.workflow.CONFIGURATION.Property configProperty =
+ new org.apache.falcon.oozie.workflow.CONFIGURATION.Property();
+ configProperty.setName((String) entry.getKey());
+ configProperty.setValue((String) entry.getValue());
+ configuration.add(configProperty);
+
+ paramList.add(entry.getKey() + "=" + entry.getValue());
+ }
+ }
+
+ protected List<String> getPrepareDeleteOutputPathList() throws FalconException {
+ final List<String> deleteList = new ArrayList<String>();
+ if (entity.getOutputs() == null) {
+ return deleteList;
+ }
+
+ for (Output output : entity.getOutputs().getOutputs()) {
+ Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());
+
+ if (FeedHelper.getStorageType(feed) == Storage.TYPE.TABLE) {
+ continue; // prepare delete only applies to FileSystem storage
+ }
+
+ deleteList.add("${wf:conf('" + output.getName() + "')}");
+ }
+
+ return deleteList;
+ }
+
+ protected void addArchiveForCustomJars(Cluster cluster, List<String> archiveList,
+ Path libPath) throws FalconException {
+ if (libPath == null) {
+ return;
+ }
+
+ try {
+ final FileSystem fs = libPath.getFileSystem(ClusterHelper.getConfiguration(cluster));
+ if (fs.isFile(libPath)) { // File, not a Dir
+ archiveList.add(libPath.toString());
+ return;
+ }
+
+ // lib path is a directory, add each file under the lib dir to archive
+ final FileStatus[] fileStatuses = fs.listStatus(libPath, new PathFilter() {
+ @Override
+ public boolean accept(Path path) {
+ try {
+ return fs.isFile(path) && path.getName().endsWith(".jar");
+ } catch (IOException ignore) {
+ return false;
+ }
+ }
+ });
+
+ for (FileStatus fileStatus : fileStatuses) {
+ archiveList.add(fileStatus.getPath().toString());
+ }
+ } catch (IOException e) {
+ throw new FalconException("Error adding archive for custom jars under: " + libPath, e);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/workflow/OozieWorkflowBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/workflow/OozieWorkflowBuilder.java b/oozie/src/main/java/org/apache/falcon/workflow/OozieWorkflowBuilder.java
deleted file mode 100644
index a0406e6..0000000
--- a/oozie/src/main/java/org/apache/falcon/workflow/OozieWorkflowBuilder.java
+++ /dev/null
@@ -1,636 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.falcon.workflow;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang.StringUtils;
-import org.apache.falcon.FalconException;
-import org.apache.falcon.FalconRuntimException;
-import org.apache.falcon.Tag;
-import org.apache.falcon.entity.ClusterHelper;
-import org.apache.falcon.entity.EntityUtil;
-import org.apache.falcon.entity.ExternalId;
-import org.apache.falcon.entity.store.ConfigurationStore;
-import org.apache.falcon.entity.v0.Entity;
-import org.apache.falcon.entity.v0.EntityType;
-import org.apache.falcon.entity.v0.cluster.Cluster;
-import org.apache.falcon.entity.v0.cluster.Property;
-import org.apache.falcon.hadoop.HadoopClientFactory;
-import org.apache.falcon.messaging.EntityInstanceMessage.ARG;
-import org.apache.falcon.oozie.bundle.BUNDLEAPP;
-import org.apache.falcon.oozie.bundle.COORDINATOR;
-import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
-import org.apache.falcon.oozie.coordinator.ObjectFactory;
-import org.apache.falcon.oozie.workflow.ACTION;
-import org.apache.falcon.oozie.workflow.CREDENTIAL;
-import org.apache.falcon.oozie.workflow.CREDENTIALS;
-import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
-import org.apache.falcon.security.SecurityUtil;
-import org.apache.falcon.service.FalconPathFilter;
-import org.apache.falcon.service.SharedLibraryHostingService;
-import org.apache.falcon.util.OozieUtils;
-import org.apache.falcon.util.RuntimeProperties;
-import org.apache.falcon.util.StartupProperties;
-import org.apache.falcon.workflow.engine.OozieWorkflowEngine;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsAction;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.oozie.client.OozieClient;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javax.xml.bind.JAXBContext;
-import javax.xml.bind.JAXBElement;
-import javax.xml.bind.JAXBException;
-import javax.xml.bind.Marshaller;
-import javax.xml.bind.Unmarshaller;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.StringWriter;
-import java.util.Arrays;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Properties;
-import java.util.Set;
-
-/**
- * Base workflow builder for falcon entities.
- * @param <T>
- */
-public abstract class OozieWorkflowBuilder<T extends Entity> extends WorkflowBuilder<T> {
-
- private static final Logger LOG = LoggerFactory.getLogger(OozieWorkflowBuilder.class);
- protected static final ConfigurationStore CONFIG_STORE = ConfigurationStore.get();
-
- protected static final String NOMINAL_TIME_EL = "${coord:formatTime(coord:nominalTime(), 'yyyy-MM-dd-HH-mm')}";
- protected static final String ACTUAL_TIME_EL = "${coord:formatTime(coord:actualTime(), 'yyyy-MM-dd-HH-mm')}";
-
- protected static final Long DEFAULT_BROKER_MSG_TTL = 3 * 24 * 60L;
- protected static final String MR_QUEUE_NAME = "queueName";
- protected static final String MR_JOB_PRIORITY = "jobPriority";
-
- protected static final String HIVE_CREDENTIAL_NAME = "falconHiveAuth";
-
- public static final String METASTOREURIS = "hive.metastore.uris";
- public static final String METASTORE_KERBEROS_PRINCIPAL = "hive.metastore.kerberos.principal";
- public static final String METASTORE_USE_THRIFT_SASL = "hive.metastore.sasl.enabled";
-
- protected static final String IGNORE = "IGNORE";
-
- public static final Set<String> FALCON_ACTIONS = new HashSet<String>(
- Arrays.asList(new String[]{"recordsize", "succeeded-post-processing", "failed-post-processing", }));
-
- protected static final FalconPathFilter FALCON_JAR_FILTER = new FalconPathFilter() {
- @Override
- public boolean accept(Path path) {
- return path.getName().startsWith("falcon");
- }
-
- @Override
- public String getJarName(Path path) {
- String name = path.getName();
- if (name.endsWith(".jar")) {
- name = name.substring(0, name.indexOf(".jar"));
- }
- return name;
- }
- };
-
- protected final boolean isSecurityEnabled;
-
- protected OozieWorkflowBuilder(T entity) {
- super(entity);
- isSecurityEnabled = SecurityUtil.isSecurityEnabled();
- }
-
- protected Path getCoordPath(Path bundlePath, String coordName) {
- Tag tag = EntityUtil.getWorkflowNameTag(coordName, entity);
- return new Path(bundlePath, tag.name());
- }
-
- protected abstract Map<String, String> getEntityProperties();
-
- public boolean map(Cluster cluster, Path bundlePath) throws FalconException {
- BUNDLEAPP bundleApp = new BUNDLEAPP();
- bundleApp.setName(EntityUtil.getWorkflowName(entity).toString());
- // all the properties are set prior to bundle and coordinators creation
-
- List<COORDINATORAPP> coordinators = getCoordinators(cluster, bundlePath);
- if (coordinators.size() == 0) {
- return false;
- }
-
- for (COORDINATORAPP coordinatorapp : coordinators) {
- Path coordPath = getCoordPath(bundlePath, coordinatorapp.getName());
- String coordXmlName = marshal(cluster, coordinatorapp, coordPath,
- EntityUtil.getWorkflowNameSuffix(coordinatorapp.getName(), entity));
-
- // copy falcon libs to the workflow dir
- copySharedLibs(cluster, coordinatorapp);
-
- // add the coordinator to the bundle
- COORDINATOR bundleCoord = new COORDINATOR();
- bundleCoord.setName(coordinatorapp.getName());
- bundleCoord.setAppPath(getStoragePath(coordPath) + "/" + coordXmlName);
- bundleApp.getCoordinator().add(bundleCoord);
- }
-
- // create logs dir once since its at the root of the bundle path
- createLogsDir(cluster);
-
- marshal(cluster, bundleApp, bundlePath); // write the bundle
- return true;
- }
-
- private void addExtensionJars(FileSystem fs, Path path, WORKFLOWAPP wf) throws IOException {
- FileStatus[] libs = null;
- try {
- libs = fs.listStatus(path);
- } catch(FileNotFoundException ignore) {
- //Ok if the libext is not configured
- }
-
- if (libs == null) {
- return;
- }
-
- for(FileStatus lib : libs) {
- if (lib.isDir()) {
- continue;
- }
-
- for(Object obj: wf.getDecisionOrForkOrJoin()) {
- if (!(obj instanceof ACTION)) {
- continue;
- }
- ACTION action = (ACTION) obj;
- List<String> files = null;
- if (action.getJava() != null) {
- files = action.getJava().getFile();
- } else if (action.getPig() != null) {
- files = action.getPig().getFile();
- } else if (action.getMapReduce() != null) {
- files = action.getMapReduce().getFile();
- }
- if (files != null) {
- files.add(lib.getPath().toString());
- }
- }
- }
- }
-
- protected void addLibExtensionsToWorkflow(Cluster cluster, WORKFLOWAPP wf, EntityType type, String lifecycle)
- throws IOException, FalconException {
- String libext = ClusterHelper.getLocation(cluster, "working") + "/libext";
- FileSystem fs = HadoopClientFactory.get().createFileSystem(
- ClusterHelper.getConfiguration(cluster));
- addExtensionJars(fs, new Path(libext), wf);
- addExtensionJars(fs, new Path(libext, type.name()), wf);
- if (StringUtils.isNotEmpty(lifecycle)) {
- addExtensionJars(fs, new Path(libext, type.name() + "/" + lifecycle), wf);
- }
- }
-
- private void copySharedLibs(Cluster cluster, COORDINATORAPP coordinatorapp) throws FalconException {
- try {
- String coordPath = coordinatorapp.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
- Path libPath = new Path(coordPath, "lib");
- SharedLibraryHostingService.pushLibsToHDFS(StartupProperties.get().getProperty("system.lib.location"),
- libPath, cluster, FALCON_JAR_FILTER);
- } catch (IOException e) {
- throw new FalconException("Failed to copy shared libs on cluster " + cluster.getName(), e);
- }
- }
-
- public abstract List<COORDINATORAPP> getCoordinators(Cluster cluster, Path bundlePath) throws FalconException;
-
- protected org.apache.falcon.oozie.coordinator.CONFIGURATION getCoordConfig(Map<String, String> propMap) {
- org.apache.falcon.oozie.coordinator.CONFIGURATION conf
- = new org.apache.falcon.oozie.coordinator.CONFIGURATION();
- List<org.apache.falcon.oozie.coordinator.CONFIGURATION.Property> props = conf.getProperty();
- for (Entry<String, String> prop : propMap.entrySet()) {
- props.add(createCoordProperty(prop.getKey(), prop.getValue()));
- }
- return conf;
- }
-
- protected Map<String, String> createCoordDefaultConfiguration(Cluster cluster, String coordName) {
- Map<String, String> props = new HashMap<String, String>();
- props.put(ARG.entityName.getPropName(), entity.getName());
- props.put(ARG.entityType.getPropName(), entity.getEntityType().name());
- props.put(ARG.nominalTime.getPropName(), NOMINAL_TIME_EL);
- props.put(ARG.timeStamp.getPropName(), ACTUAL_TIME_EL);
-
- addBrokerProperties(cluster, props);
-
- props.put(OozieClient.EXTERNAL_ID,
- new ExternalId(entity.getName(), EntityUtil.getWorkflowNameTag(coordName, entity),
- "${coord:nominalTime()}").getId());
- props.put("workflowEngineUrl", ClusterHelper.getOozieUrl(cluster));
-
- addLateDataProperties(props);
-
- addClusterProperties(cluster, props);
-
- props.put(MR_QUEUE_NAME, "default");
- props.put(MR_JOB_PRIORITY, "NORMAL");
-
- //props in entity override the set props.
- props.putAll(getEntityProperties());
-
- // this cannot be overridden
- props.put("logDir", getStoragePath(EntityUtil.getLogPath(cluster, entity)));
-
- return props;
- }
-
- private void addBrokerProperties(Cluster cluster, Map<String, String> props) {
- props.put("userBrokerUrl", ClusterHelper.getMessageBrokerUrl(cluster));
- props.put("userBrokerImplClass", ClusterHelper.getMessageBrokerImplClass(cluster));
-
- String falconBrokerUrl = StartupProperties.get().getProperty(
- ARG.brokerUrl.getPropName(), "tcp://localhost:61616?daemon=true");
- props.put(ARG.brokerUrl.getPropName(), falconBrokerUrl);
-
- String falconBrokerImplClass = StartupProperties.get().getProperty(
- ARG.brokerImplClass.getPropName(), ClusterHelper.DEFAULT_BROKER_IMPL_CLASS);
- props.put(ARG.brokerImplClass.getPropName(), falconBrokerImplClass);
-
- String jmsMessageTTL = StartupProperties.get().getProperty(
- "broker.ttlInMins", DEFAULT_BROKER_MSG_TTL.toString());
- props.put(ARG.brokerTTL.getPropName(), jmsMessageTTL);
- }
-
- private void addLateDataProperties(Map<String, String> props) {
- try {
- if (EntityUtil.getLateProcess(entity) == null
- || EntityUtil.getLateProcess(entity).getLateInputs() == null
- || EntityUtil.getLateProcess(entity).getLateInputs().size() == 0) {
- props.put("shouldRecord", "false");
- } else {
- props.put("shouldRecord", "true");
- }
- } catch (FalconException e) {
- LOG.error("Unable to get Late Process for entity: {}", entity, e);
- throw new FalconRuntimException(e);
- }
- }
-
- private void addClusterProperties(Cluster cluster, Map<String, String> props) {
- props.put(ARG.cluster.getPropName(), cluster.getName());
-
- if (cluster.getProperties() != null) {
- for (Property prop : cluster.getProperties().getProperties()) {
- props.put(prop.getName(), prop.getValue());
- }
- }
- }
-
- protected org.apache.falcon.oozie.coordinator.CONFIGURATION.Property createCoordProperty(String name,
- String value) {
- org.apache.falcon.oozie.coordinator.CONFIGURATION.Property prop
- = new org.apache.falcon.oozie.coordinator.CONFIGURATION.Property();
- prop.setName(name);
- prop.setValue(value);
- return prop;
- }
-
- protected void marshal(Cluster cluster, JAXBElement<?> jaxbElement, JAXBContext jaxbContext, Path outPath)
- throws FalconException {
- try {
- Marshaller marshaller = jaxbContext.createMarshaller();
- marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);
- FileSystem fs = HadoopClientFactory.get().createFileSystem(
- outPath.toUri(), ClusterHelper.getConfiguration(cluster));
- OutputStream out = fs.create(outPath);
- try {
- marshaller.marshal(jaxbElement, out);
- } finally {
- out.close();
- }
- if (LOG.isDebugEnabled()) {
- StringWriter writer = new StringWriter();
- marshaller.marshal(jaxbElement, writer);
- LOG.debug("Writing definition to {} on cluster {}", outPath, cluster.getName());
- LOG.debug(writer.getBuffer().toString());
- }
-
- LOG.info("Marshalled {} to {}", jaxbElement.getDeclaredType(), outPath);
- } catch (Exception e) {
- throw new FalconException("Unable to marshall app object", e);
- }
- }
-
- private void createLogsDir(Cluster cluster) throws FalconException {
- Path logsDir = EntityUtil.getLogPath(cluster, entity);
- try {
- FileSystem fs = HadoopClientFactory.get().createFileSystem(
- ClusterHelper.getConfiguration(cluster));
- if (fs.exists(logsDir)) {
- return;
- }
-
- fs.mkdirs(logsDir);
-
- // logs are copied with in oozie as the user in Post Processing and hence 777 permissions
- FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL);
- fs.setPermission(logsDir, permission);
- } catch (Exception e) {
- throw new FalconException("Unable to create logs dir at: " + logsDir, e);
- }
- }
-
- protected String marshal(Cluster cluster, COORDINATORAPP coord, Path outPath,
- String name) throws FalconException {
- name = (StringUtils.isEmpty(name) ? "coordinator" : name) + ".xml";
- marshal(cluster, new ObjectFactory().createCoordinatorApp(coord),
- OozieUtils.COORD_JAXB_CONTEXT, new Path(outPath, name));
- return name;
- }
-
- protected void marshal(Cluster cluster, BUNDLEAPP bundle, Path outPath) throws FalconException {
- marshal(cluster, new org.apache.falcon.oozie.bundle.ObjectFactory().createBundleApp(bundle),
- OozieUtils.BUNDLE_JAXB_CONTEXT, new Path(outPath, "bundle.xml"));
- }
-
- protected void marshal(Cluster cluster, WORKFLOWAPP workflow, Path outPath) throws FalconException {
- marshal(cluster, new org.apache.falcon.oozie.workflow.ObjectFactory().createWorkflowApp(workflow),
- OozieUtils.WORKFLOW_JAXB_CONTEXT, new Path(outPath, "workflow.xml"));
- }
-
- protected String getStoragePath(Path path) {
- if (path != null) {
- return getStoragePath(path.toString());
- }
- return null;
- }
-
- protected String getStoragePath(String path) {
- if (StringUtils.isNotEmpty(path)) {
- if (new Path(path).toUri().getScheme() == null) {
- path = "${nameNode}" + path;
- }
- }
- return path;
- }
-
- protected WORKFLOWAPP getWorkflowTemplate(String template) throws FalconException {
- InputStream resourceAsStream = null;
- try {
- resourceAsStream = OozieWorkflowBuilder.class.getResourceAsStream(template);
- Unmarshaller unmarshaller = OozieUtils.WORKFLOW_JAXB_CONTEXT.createUnmarshaller();
- @SuppressWarnings("unchecked")
- JAXBElement<WORKFLOWAPP> jaxbElement = (JAXBElement<WORKFLOWAPP>) unmarshaller.unmarshal(
- resourceAsStream);
- return jaxbElement.getValue();
- } catch (JAXBException e) {
- throw new FalconException(e);
- } finally {
- IOUtils.closeQuietly(resourceAsStream);
- }
- }
-
- protected COORDINATORAPP getCoordinatorTemplate(String template) throws FalconException {
- InputStream resourceAsStream = null;
- try {
- resourceAsStream = OozieWorkflowBuilder.class.getResourceAsStream(template);
- Unmarshaller unmarshaller = OozieUtils.COORD_JAXB_CONTEXT.createUnmarshaller();
- @SuppressWarnings("unchecked")
- JAXBElement<COORDINATORAPP> jaxbElement = (JAXBElement<COORDINATORAPP>)
- unmarshaller.unmarshal(resourceAsStream);
- return jaxbElement.getValue();
- } catch (JAXBException e) {
- throw new FalconException(e);
- } finally {
- IOUtils.closeQuietly(resourceAsStream);
- }
- }
-
- // creates hive-site.xml configuration in conf dir for the given cluster on the same cluster.
- protected void createHiveConfiguration(Cluster cluster, Path workflowPath,
- String prefix) throws FalconException {
- Configuration hiveConf = getHiveCredentialsAsConf(cluster);
-
- try {
- Configuration conf = ClusterHelper.getConfiguration(cluster);
- FileSystem fs = HadoopClientFactory.get().createFileSystem(conf);
-
- // create hive conf to stagingDir
- Path confPath = new Path(workflowPath + "/conf");
-
- persistHiveConfiguration(fs, confPath, hiveConf, prefix);
- } catch (IOException e) {
- throw new FalconException("Unable to create create hive site", e);
- }
- }
-
- protected void persistHiveConfiguration(FileSystem fs, Path confPath,
- Cluster cluster, String prefix) throws IOException {
- Configuration hiveConf = getHiveCredentialsAsConf(cluster);
- persistHiveConfiguration(fs, confPath, hiveConf, prefix);
- }
-
- private void persistHiveConfiguration(FileSystem fs, Path confPath, Configuration hiveConf,
- String prefix) throws IOException {
- OutputStream out = null;
- try {
- out = fs.create(new Path(confPath, prefix + "hive-site.xml"));
- hiveConf.writeXml(out);
- } finally {
- IOUtils.closeQuietly(out);
- }
- }
-
- private Configuration getHiveCredentialsAsConf(Cluster cluster) {
- Map<String, String> hiveCredentials = getHiveCredentials(cluster);
-
- Configuration hiveConf = new Configuration(false);
- for (Entry<String, String> entry : hiveCredentials.entrySet()) {
- hiveConf.set(entry.getKey(), entry.getValue());
- }
-
- return hiveConf;
- }
-
- private Map<String, String> getHiveCredentials(Cluster cluster) {
- Map<String, String> hiveCredentials = new HashMap<String, String>();
-
- String metaStoreUrl = ClusterHelper.getRegistryEndPoint(cluster);
- if (metaStoreUrl == null) {
- throw new IllegalStateException(
- "Registry interface is not defined in cluster: " + cluster.getName());
- }
-
- // Propagate the hive properties from cluster entity
- Map<String, String> hiveProperties = ClusterHelper.geHiveProperties(cluster);
- if (hiveProperties != null && !hiveProperties.isEmpty()) {
- hiveCredentials.putAll(hiveProperties);
- }
-
- hiveCredentials.put(METASTOREURIS, metaStoreUrl);
- hiveCredentials.put("hive.metastore.execute.setugi", "true");
- hiveCredentials.put("hcatNode", metaStoreUrl.replace("thrift", "hcat"));
- hiveCredentials.put("hcat.metastore.uri", metaStoreUrl);
-
- if (isSecurityEnabled) {
- String principal = ClusterHelper
- .getPropertyValue(cluster, SecurityUtil.HIVE_METASTORE_PRINCIPAL);
- hiveCredentials.put(METASTORE_KERBEROS_PRINCIPAL, principal);
- hiveCredentials.put(METASTORE_USE_THRIFT_SASL, "true");
- hiveCredentials.put("hcat.metastore.principal", principal);
- }
-
- return hiveCredentials;
- }
-
- /**
- * This is only necessary if table is involved and is secure mode.
- *
- * @param cluster cluster entity
- * @param credentialName credential name
- * @return CREDENTIALS object
- */
- protected CREDENTIAL createHCatalogCredential(Cluster cluster, String credentialName) {
- final String metaStoreUrl = ClusterHelper.getRegistryEndPoint(cluster);
-
- CREDENTIAL credential = new CREDENTIAL();
- credential.setName(credentialName);
- credential.setType("hcat");
-
- credential.getProperty().add(createProperty("hcat.metastore.uri", metaStoreUrl));
- credential.getProperty().add(createProperty("hcat.metastore.principal",
- ClusterHelper.getPropertyValue(cluster, SecurityUtil.HIVE_METASTORE_PRINCIPAL)));
-
- return credential;
- }
-
- private CREDENTIAL.Property createProperty(String name, String value) {
- CREDENTIAL.Property property = new CREDENTIAL.Property();
- property.setName(name);
- property.setValue(value);
- return property;
- }
-
- /**
- * This is only necessary if table is involved and is secure mode.
- *
- * @param workflowApp workflow xml
- * @param cluster cluster entity
- */
- protected void addHCatalogCredentials(WORKFLOWAPP workflowApp, Cluster cluster,
- String credentialName) {
- CREDENTIALS credentials = workflowApp.getCredentials();
- if (credentials == null) {
- credentials = new CREDENTIALS();
- }
-
- credentials.getCredential().add(createHCatalogCredential(cluster, credentialName));
-
- // add credential for workflow
- workflowApp.setCredentials(credentials);
- }
-
- /**
- * This is only necessary if table is involved and is secure mode.
- *
- * @param workflowApp workflow xml
- * @param cluster cluster entity
- */
- protected void addHCatalogCredentials(WORKFLOWAPP workflowApp, Cluster cluster,
- String credentialName, Set<String> actions) {
- addHCatalogCredentials(workflowApp, cluster, credentialName);
-
- // add credential to each action
- for (Object object : workflowApp.getDecisionOrForkOrJoin()) {
- if (!(object instanceof ACTION)) {
- continue;
- }
-
- ACTION action = (ACTION) object;
- String actionName = action.getName();
- if (actions.contains(actionName)) {
- action.setCred(credentialName);
- }
- }
- }
-
- protected abstract boolean shouldSetupHiveConfiguration(Cluster cluster,
- T entity) throws FalconException;
-
- protected void decorateWithOozieRetries(ACTION action) {
- Properties props = RuntimeProperties.get();
- action.setRetryMax(props.getProperty("falcon.parentworkflow.retry.max", "3"));
- action.setRetryInterval(props.getProperty("falcon.parentworkflow.retry.interval.secs", "1"));
- }
-
- protected Properties createAppProperties(String clusterName, Path bundlePath, String user) throws FalconException {
- Cluster cluster = EntityUtil.getEntity(EntityType.CLUSTER, clusterName);
- Properties properties = new Properties();
- if (cluster.getProperties() != null) {
- addClusterProperties(properties, cluster.getProperties().getProperties());
- }
- properties.setProperty(OozieWorkflowEngine.NAME_NODE, ClusterHelper.getStorageUrl(cluster));
- properties.setProperty(OozieWorkflowEngine.JOB_TRACKER, ClusterHelper.getMREndPoint(cluster));
- properties.setProperty(OozieClient.BUNDLE_APP_PATH,
- "${" + OozieWorkflowEngine.NAME_NODE + "}" + bundlePath.toString());
- properties.setProperty("colo.name", cluster.getColo());
-
- properties.setProperty(OozieClient.USER_NAME, user);
- properties.setProperty(OozieClient.USE_SYSTEM_LIBPATH, "true");
- properties.setProperty("falcon.libpath", ClusterHelper.getLocation(cluster, "working") + "/lib");
-
- if (shouldSetupHiveConfiguration(cluster, entity)) {
- propagateHiveCredentials(cluster, properties);
- }
-
- LOG.info("Cluster: {}, PROPS: {}", cluster.getName(), properties);
- return properties;
- }
-
- private void addClusterProperties(Properties properties, List<Property> clusterProperties) {
- for (Property prop : clusterProperties) {
- properties.setProperty(prop.getName(), prop.getValue());
- }
- }
-
- /**
- * This method propagates hive credentials for coordinator to authenticate against hive
- * for data availability triggers.
- *
- * @param cluster cluster entity
- * @param properties property object
- */
- private void propagateHiveCredentials(Cluster cluster, Properties properties) {
- Map<String, String> hiveCredentials = getHiveCredentials(cluster);
- for (Entry<String, String> entry : hiveCredentials.entrySet()) {
- properties.setProperty(entry.getKey(), entry.getValue());
- }
- }
-
- public abstract Date getNextStartTime(T entity, String cluster, Date now) throws FalconException;
-}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/workflow/engine/OozieWorkflowEngine.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/workflow/engine/OozieWorkflowEngine.java b/oozie/src/main/java/org/apache/falcon/workflow/engine/OozieWorkflowEngine.java
index c65bed9..38be792 100644
--- a/oozie/src/main/java/org/apache/falcon/workflow/engine/OozieWorkflowEngine.java
+++ b/oozie/src/main/java/org/apache/falcon/workflow/engine/OozieWorkflowEngine.java
@@ -32,6 +32,7 @@ import org.apache.falcon.entity.v0.Frequency.TimeUnit;
import org.apache.falcon.entity.v0.SchemaHelper;
import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.hadoop.HadoopClientFactory;
+import org.apache.falcon.oozie.OozieEntityBuilder;
import org.apache.falcon.resource.APIResult;
import org.apache.falcon.resource.InstancesResult;
import org.apache.falcon.resource.InstancesResult.Instance;
@@ -42,8 +43,6 @@ import org.apache.falcon.security.CurrentUser;
import org.apache.falcon.update.UpdateHelper;
import org.apache.falcon.util.OozieUtils;
import org.apache.falcon.util.RuntimeProperties;
-import org.apache.falcon.workflow.OozieWorkflowBuilder;
-import org.apache.falcon.workflow.WorkflowBuilder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -142,22 +141,20 @@ public class OozieWorkflowEngine extends AbstractWorkflowEngine {
}
if (!schedClusters.isEmpty()) {
- WorkflowBuilder<Entity> builder = WorkflowBuilder.getBuilder(ENGINE, entity);
- Map<String, Properties> newFlows =
- builder.newWorkflowSchedule(schedClusters.toArray(new String[schedClusters.size()]));
- for (Map.Entry<String, Properties> entry : newFlows.entrySet()) {
- String cluster = entry.getKey();
- LOG.info("Scheduling {} on cluster {}", entity.toShortString(), cluster);
- scheduleEntity(cluster, entry.getValue(), entity);
- commitStagingPath(cluster, entry.getValue().getProperty(OozieClient.BUNDLE_APP_PATH));
+ OozieEntityBuilder builder = OozieEntityBuilder.get(entity);
+ for (String clusterName: schedClusters) {
+ Cluster cluster = ConfigurationStore.get().get(EntityType.CLUSTER, clusterName);
+ LOG.info("Scheduling {} on cluster {}", entity.toShortString(), clusterName);
+ Path buildPath = EntityUtil.getNewStagingPath(cluster, entity);
+ Properties properties = builder.build(cluster, buildPath);
+ scheduleEntity(clusterName, properties, entity);
+ commitStagingPath(cluster, buildPath);
}
}
}
- private void commitStagingPath(String cluster, String path) throws FalconException {
- path = StringUtils.removeStart(path, "${nameNode}");
- Cluster clusterEntity = ConfigurationStore.get().get(EntityType.CLUSTER, cluster);
- FileSystem fs = HadoopClientFactory.get().createFileSystem(ClusterHelper.getConfiguration(clusterEntity));
+ private void commitStagingPath(Cluster cluster, Path path) throws FalconException {
+ FileSystem fs = HadoopClientFactory.get().createFileSystem(ClusterHelper.getConfiguration(cluster));
try {
fs.create(new Path(path, EntityUtil.SUCCEEDED_FILE_NAME)).close();
} catch (IOException e) {
@@ -405,21 +402,13 @@ public class OozieWorkflowEngine extends AbstractWorkflowEngine {
@Override
public InstancesResult getRunningInstances(Entity entity, List<LifeCycle> lifeCycles) throws FalconException {
try {
- WorkflowBuilder<Entity> builder = WorkflowBuilder.getBuilder(ENGINE, entity);
Set<String> clusters = EntityUtil.getClustersDefinedInColos(entity);
List<Instance> runInstances = new ArrayList<Instance>();
- String[] wfNames = builder.getWorkflowNames();
- List<String> coordNames = new ArrayList<String>();
- for (String wfName : wfNames) {
- if (!isCoordApplicable(wfName, lifeCycles)) {
- continue;
- }
- coordNames.add(wfName);
- }
for (String cluster : clusters) {
ProxyOozieClient client = OozieClientFactory.get(cluster);
- List<WorkflowJob> wfs = getRunningWorkflows(cluster, coordNames);
+ List<String> wfNames = EntityUtil.getWorkflowNames(entity, cluster);
+ List<WorkflowJob> wfs = getRunningWorkflows(cluster, wfNames);
if (wfs != null) {
for (WorkflowJob job : wfs) {
WorkflowJob wf = client.getJobInfo(job.getId());
@@ -958,8 +947,8 @@ public class OozieWorkflowEngine extends AbstractWorkflowEngine {
LOG.info("Updating entity through Workflow Engine {}", newEntity.toShortString());
Date newEndTime = EntityUtil.getEndTime(newEntity, cluster);
if (newEndTime.before(now())) {
- throw new FalconException("New end time for " + newEntity.getName() + " is past current time. Entity "
- + "can't be updated. Use remove and add");
+ throw new FalconException("Entity's end time " + SchemaHelper.formatDateUTC(newEndTime)
+ + " is before current time. Entity can't be updated. Use remove and add");
}
LOG.debug("Updating for cluster: {}, bundle: {}", cluster, bundle.getId());
@@ -974,8 +963,8 @@ public class OozieWorkflowEngine extends AbstractWorkflowEngine {
LOG.debug("Going to update! : {} for cluster {}, bundle: {}",
newEntity.toShortString(), cluster, bundle.getId());
- effectiveTime = updateInternal(oldEntity, newEntity, cluster, bundle,
- false, effectiveTime, CurrentUser.getUser());
+ effectiveTime = updateInternal(oldEntity, newEntity, clusterEntity, bundle, false, effectiveTime,
+ CurrentUser.getUser());
LOG.info("Entity update complete: {} for cluster {}, bundle: {}",
newEntity.toShortString(), cluster, bundle.getId());
}
@@ -999,8 +988,8 @@ public class OozieWorkflowEngine extends AbstractWorkflowEngine {
LOG.info("Triggering update for {}, {}", cluster, affectedProcBundle.getId());
- Date depEndTime = updateInternal(affectedEntity, affectedEntity, cluster,
- affectedProcBundle, false, effectiveTime, affectedProcBundle.getUser());
+ Date depEndTime = updateInternal(affectedEntity, affectedEntity, clusterEntity, affectedProcBundle,
+ false, effectiveTime, affectedProcBundle.getUser());
if (effectiveTime == null || effectiveTime.after(depEndTime)) {
effectiveTime = depEndTime;
}
@@ -1091,20 +1080,17 @@ public class OozieWorkflowEngine extends AbstractWorkflowEngine {
}
}
- private Date updateInternal(Entity oldEntity, Entity newEntity, String cluster,
- BundleJob oldBundle, boolean alreadyCreated, Date inEffectiveTime,
- String user) throws FalconException {
- OozieWorkflowBuilder<Entity> builder =
- (OozieWorkflowBuilder<Entity>) WorkflowBuilder.getBuilder(ENGINE, oldEntity);
-
+ private Date updateInternal(Entity oldEntity, Entity newEntity, Cluster cluster, BundleJob oldBundle,
+ boolean alreadyCreated, Date inEffectiveTime, String user) throws FalconException {
Job.Status oldBundleStatus = oldBundle.getStatus();
+ String clusterName = cluster.getName();
+
//Suspend coords as bundle suspend doesn't suspend coords synchronously
- suspendCoords(cluster, oldBundle);
+ suspendCoords(clusterName, oldBundle);
- Cluster clusterEntity = ConfigurationStore.get().get(EntityType.CLUSTER, cluster);
- Path stagingPath = EntityUtil.getLatestStagingPath(clusterEntity, oldEntity);
+ Path stagingPath = EntityUtil.getLatestStagingPath(cluster, oldEntity);
//find last scheduled bundle
- BundleJob latestBundle = findBundleForStagingPath(cluster, oldEntity, stagingPath);
+ BundleJob latestBundle = findBundleForStagingPath(clusterName, oldEntity, stagingPath);
Date effectiveTime;
if (oldBundle.getAppPath().endsWith(stagingPath.toUri().getPath())
|| latestBundle == null || !alreadyCreated) {
@@ -1121,13 +1107,13 @@ public class OozieWorkflowEngine extends AbstractWorkflowEngine {
}
//pick start time for new bundle which is after effectiveTime
- effectiveTime = builder.getNextStartTime(newEntity, cluster, effectiveTime);
+ effectiveTime = EntityUtil.getNextStartTime(newEntity, cluster, effectiveTime);
//schedule new bundle
String newBundleId = scheduleForUpdate(newEntity, cluster, effectiveTime, user);
//newBundleId and latestBundle will be null if effectiveTime = process end time
if (newBundleId != null) {
- latestBundle = getBundleInfo(cluster, newBundleId);
+ latestBundle = getBundleInfo(clusterName, newBundleId);
LOG.info("New bundle {} scheduled successfully with start time {}",
newBundleId, SchemaHelper.formatDateUTC(effectiveTime));
}
@@ -1144,37 +1130,37 @@ public class OozieWorkflowEngine extends AbstractWorkflowEngine {
}
if (effectiveTime != null) {
//set endtime for old coords
- updateCoords(cluster, oldBundle, EntityUtil.getParallel(oldEntity), effectiveTime);
+ updateCoords(clusterName, oldBundle, EntityUtil.getParallel(oldEntity), effectiveTime);
}
if (oldBundleStatus != Job.Status.SUSPENDED
&& oldBundleStatus != Job.Status.PREPSUSPENDED) {
//resume coords
- resumeCoords(cluster, oldBundle);
+ resumeCoords(clusterName, oldBundle);
}
//latestBundle will be null if effectiveTime = process end time
if (latestBundle != null) {
//create _SUCCESS in staging path to mark update is complete(to handle roll-forward for updates)
- commitStagingPath(cluster, latestBundle.getAppPath());
+ commitStagingPath(cluster, new Path(latestBundle.getAppPath()));
}
return effectiveTime;
}
- private String scheduleForUpdate(Entity entity, String cluster, Date startDate, String user)
+ private String scheduleForUpdate(Entity entity, Cluster cluster, Date startDate, String user)
throws FalconException {
Entity clone = entity.copy();
String currentUser = CurrentUser.getUser();
switchUser(user);
try {
- EntityUtil.setStartDate(clone, cluster, startDate);
- WorkflowBuilder<Entity> builder = WorkflowBuilder.getBuilder(ENGINE, clone);
- Map<String, Properties> bundleProps = builder.newWorkflowSchedule(cluster);
- LOG.info("Scheduling {} on cluster {} with props {}",
- entity.toShortString(), cluster, bundleProps);
- if (bundleProps != null && bundleProps.size() > 0) {
- return scheduleEntity(cluster, bundleProps.get(cluster), entity);
+ EntityUtil.setStartDate(clone, cluster.getName(), startDate);
+ Path buildPath = EntityUtil.getNewStagingPath(cluster, clone);
+ OozieEntityBuilder builder = OozieEntityBuilder.get(clone);
+ Properties properties = builder.build(cluster, buildPath);
+ if (properties != null) {
+ LOG.info("Scheduling {} on cluster {} with props {}", entity.toShortString(), cluster, properties);
+ return scheduleEntity(cluster.getName(), properties, entity);
} else {
LOG.info("No new workflow to be scheduled for this " + entity.toShortString());
return null;
@@ -1210,7 +1196,8 @@ public class OozieWorkflowEngine extends AbstractWorkflowEngine {
}
}
- private List<WorkflowJob> getRunningWorkflows(String cluster, List<String> wfNames) throws FalconException {
+ private List<WorkflowJob> getRunningWorkflows(String cluster, List<String> wfNames) throws
+ FalconException {
StringBuilder filter = new StringBuilder();
filter.append(OozieClient.FILTER_STATUS).append('=').append(Job.Status.RUNNING.name());
for (String wfName : wfNames) {
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/resources/coordinator/replication-coordinator.xml
----------------------------------------------------------------------
diff --git a/oozie/src/main/resources/coordinator/replication-coordinator.xml b/oozie/src/main/resources/coordinator/replication-coordinator.xml
new file mode 100644
index 0000000..693b0bd
--- /dev/null
+++ b/oozie/src/main/resources/coordinator/replication-coordinator.xml
@@ -0,0 +1,51 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<coordinator-app name="#NAME#" frequency="#FREQUENCY#"
+ start="#START_TIME#" end="#END_TIME" timezone="#TIMEZONE#"
+ xmlns="uri:oozie:coordinator:0.3">
+ <controls>
+ <concurrency>1</concurrency>
+ <execution>FIFO</execution>
+ </controls>
+ <datasets>
+ <dataset name="input-dataset" frequency="#FEED_FREQ#"
+ initial-instance="#START_TIME#" timezone="#TIMEZONE#">
+ <uri-template>#FEED_PATH#</uri-template>
+ </dataset>
+ <dataset name="output-dataset" frequency="#FEED_FREQ#"
+ initial-instance="#START_TIME#" timezone="#TIMEZONE#">
+ <uri-template>#FEED_PATH#</uri-template>
+ </dataset>
+ </datasets>
+ <input-events>
+ <data-in name="input" dataset="input-dataset">
+ <instance>${coord:current(0)}</instance>
+ </data-in>
+ </input-events>
+ <output-events>
+ <data-out name="output" dataset="output-dataset">
+ <instance>${coord:current(0)}</instance>
+ </data-out>
+ </output-events>
+ <action>
+ <workflow>
+ <app-path>#WF_PATH#</app-path>
+ <configuration/>
+ </workflow>
+ </action>
+</coordinator-app>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/resources/workflow/falcon-table-export.hql
----------------------------------------------------------------------
diff --git a/oozie/src/main/resources/workflow/falcon-table-export.hql b/oozie/src/main/resources/workflow/falcon-table-export.hql
new file mode 100644
index 0000000..37fd1b7
--- /dev/null
+++ b/oozie/src/main/resources/workflow/falcon-table-export.hql
@@ -0,0 +1,18 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+export table ${falconSourceDatabase}.${falconSourceTable} partition ${falconSourcePartition} to '${falconSourceStagingDir}';
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/resources/workflow/falcon-table-import.hql
----------------------------------------------------------------------
diff --git a/oozie/src/main/resources/workflow/falcon-table-import.hql b/oozie/src/main/resources/workflow/falcon-table-import.hql
new file mode 100644
index 0000000..653d580
--- /dev/null
+++ b/oozie/src/main/resources/workflow/falcon-table-import.hql
@@ -0,0 +1,20 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+use ${falconTargetDatabase};
+alter table ${falconTargetTable} drop if exists partition ${falconTargetPartition};
+import table ${falconTargetTable} partition ${falconTargetPartition} from '${falconTargetStagingDir}';
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/resources/workflow/process-parent-workflow.xml
----------------------------------------------------------------------
diff --git a/oozie/src/main/resources/workflow/process-parent-workflow.xml b/oozie/src/main/resources/workflow/process-parent-workflow.xml
new file mode 100644
index 0000000..4a2495c
--- /dev/null
+++ b/oozie/src/main/resources/workflow/process-parent-workflow.xml
@@ -0,0 +1,278 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<workflow-app xmlns='uri:oozie:workflow:0.3' name='falcon-process-parent-workflow'>
+ <start to='should-record'/>
+ <decision name='should-record'>
+ <switch>
+ <case to="recordsize">
+ ${shouldRecord=="true"}
+ </case>
+ <default to="user-workflow"/>
+ </switch>
+ </decision>
+ <action name='recordsize'>
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ <!-- HCatalog jars -->
+ <property>
+ <name>oozie.action.sharelib.for.java</name>
+ <value>hcatalog</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.latedata.LateDataHandler</main-class>
+ <arg>-out</arg>
+ <arg>${logDir}/latedata/${nominalTime}</arg>
+ <arg>-paths</arg>
+ <arg>${falconInPaths}</arg>
+ <arg>-falconInputFeeds</arg>
+ <arg>${falconInputFeeds}</arg>
+ <arg>-falconInputFeedStorageTypes</arg>
+ <arg>${falconInputFeedStorageTypes}</arg>
+ <capture-output/>
+ </java>
+ <ok to="user-workflow"/>
+ <error to="failed-post-processing"/>
+ </action>
+ <decision name='user-workflow'>
+ <switch>
+ <case to="user-oozie-workflow">
+ ${userWorkflowEngine=="oozie"}
+ </case>
+ <case to="user-pig-job">
+ ${userWorkflowEngine=="pig"}
+ </case>
+ <case to="user-hive-job">
+ ${userWorkflowEngine=="hive"}
+ </case>
+ <default to="user-oozie-workflow"/>
+ </switch>
+ </decision>
+ <action name='user-pig-job'>
+ <pig>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ <property>
+ <name>oozie.action.sharelib.for.pig</name>
+ <value>pig,hcatalog</value>
+ </property>
+ </configuration>
+ <script>#USER_WF_PATH#</script>
+ </pig>
+ <ok to="succeeded-post-processing"/>
+ <error to="failed-post-processing"/>
+ </action>
+ <action name="user-hive-job">
+ <hive xmlns="uri:oozie:hive-action:0.2">
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <job-xml>${wf:appPath()}/conf/hive-site.xml</job-xml>
+ <configuration>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ </configuration>
+ <script>#USER_WF_PATH#</script>
+ </hive>
+ <ok to="succeeded-post-processing"/>
+ <error to="failed-post-processing"/>
+ </action>
+ <action name='user-oozie-workflow'>
+ <sub-workflow>
+ <app-path>#USER_WF_PATH#</app-path>
+ <propagate-configuration/>
+ </sub-workflow>
+ <ok to="succeeded-post-processing"/>
+ <error to="failed-post-processing"/>
+ </action>
+ <action name='succeeded-post-processing'>
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.workflow.FalconPostProcessing</main-class>
+ <arg>-cluster</arg>
+ <arg>${cluster}</arg>
+ <arg>-entityType</arg>
+ <arg>${entityType}</arg>
+ <arg>-entityName</arg>
+ <arg>${entityName}</arg>
+ <arg>-nominalTime</arg>
+ <arg>${nominalTime}</arg>
+ <arg>-operation</arg>
+ <arg>GENERATE</arg>
+ <arg>-workflowId</arg>
+ <arg>${wf:id()}</arg>
+ <arg>-runId</arg>
+ <arg>${wf:run()}</arg>
+ <arg>-status</arg>
+ <arg>SUCCEEDED</arg>
+ <arg>-timeStamp</arg>
+ <arg>${timeStamp}</arg>
+ <arg>-brokerImplClass</arg>
+ <arg>${wf:conf("broker.impl.class")}</arg>
+ <arg>-brokerUrl</arg>
+ <arg>${wf:conf("broker.url")}</arg>
+ <arg>-userBrokerImplClass</arg>
+ <arg>${userBrokerImplClass}</arg>
+ <arg>-userBrokerUrl</arg>
+ <arg>${userBrokerUrl}</arg>
+ <arg>-brokerTTL</arg>
+ <arg>${wf:conf("broker.ttlInMins")}</arg>
+ <arg>-feedNames</arg>
+ <arg>${feedNames}</arg>
+ <arg>-feedInstancePaths</arg>
+ <arg>${feedInstancePaths}</arg>
+ <arg>-logFile</arg>
+ <arg>${logDir}/instancePaths-${nominalTime}.csv</arg>
+ <arg>-workflowEngineUrl</arg>
+ <arg>${workflowEngineUrl}</arg>
+ <arg>-subflowId</arg>
+ <arg>${wf:id()}${userWorkflowEngine == "oozie" ? "@user-oozie-workflow" : ""}</arg>
+ <arg>-userWorkflowEngine</arg>
+ <arg>${userWorkflowEngine}</arg>
+ <arg>-userWorkflowName</arg>
+ <arg>${userWorkflowName}</arg>
+ <arg>-userWorkflowVersion</arg>
+ <arg>${userWorkflowVersion}</arg>
+ <arg>-logDir</arg>
+ <arg>${logDir}/job-${nominalTime}/</arg>
+ <arg>-workflowUser</arg>
+ <arg>${wf:user()}</arg>
+ <arg>-falconInputFeeds</arg>
+ <arg>${falconInputFeeds}</arg>
+ <arg>-falconInputPaths</arg>
+ <arg>${falconInPaths}</arg>
+ <file>${wf:conf("falcon.libpath")}/activemq-core.jar</file>
+ <file>${wf:conf("falcon.libpath")}/geronimo-j2ee-management.jar</file>
+ <file>${wf:conf("falcon.libpath")}/jms.jar</file>
+ <file>${wf:conf("falcon.libpath")}/json-simple.jar</file>
+ <file>${wf:conf("falcon.libpath")}/oozie-client.jar</file>
+ <file>${wf:conf("falcon.libpath")}/spring-jms.jar</file>
+ </java>
+ <ok to="end"/>
+ <error to="fail"/>
+ </action>
+ <action name='failed-post-processing'>
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.workflow.FalconPostProcessing</main-class>
+ <arg>-cluster</arg>
+ <arg>${cluster}</arg>
+ <arg>-entityType</arg>
+ <arg>${entityType}</arg>
+ <arg>-entityName</arg>
+ <arg>${entityName}</arg>
+ <arg>-nominalTime</arg>
+ <arg>${nominalTime}</arg>
+ <arg>-operation</arg>
+ <arg>GENERATE</arg>
+ <arg>-workflowId</arg>
+ <arg>${wf:id()}</arg>
+ <arg>-runId</arg>
+ <arg>${wf:run()}</arg>
+ <arg>-status</arg>
+ <arg>FAILED</arg>
+ <arg>-timeStamp</arg>
+ <arg>${timeStamp}</arg>
+ <arg>-brokerImplClass</arg>
+ <arg>${wf:conf("broker.impl.class")}</arg>
+ <arg>-brokerUrl</arg>
+ <arg>${wf:conf("broker.url")}</arg>
+ <arg>-userBrokerImplClass</arg>
+ <arg>${userBrokerImplClass}</arg>
+ <arg>-userBrokerUrl</arg>
+ <arg>${userBrokerUrl}</arg>
+ <arg>-brokerTTL</arg>
+ <arg>${wf:conf("broker.ttlInMins")}</arg>
+ <arg>-feedNames</arg>
+ <arg>${feedNames}</arg>
+ <arg>-feedInstancePaths</arg>
+ <arg>${feedInstancePaths}</arg>
+ <arg>-logFile</arg>
+ <arg>${logDir}/instancePaths-${nominalTime}.csv</arg>
+ <arg>-workflowEngineUrl</arg>
+ <arg>${workflowEngineUrl}</arg>
+ <arg>-subflowId</arg>
+ <arg>${wf:id()}${userWorkflowEngine == "oozie" ? "@user-oozie-workflow" : ""}</arg>
+ <arg>-userWorkflowEngine</arg>
+ <arg>${userWorkflowEngine}</arg>
+ <arg>-logDir</arg>
+ <arg>${logDir}/job-${nominalTime}/</arg>
+ <arg>-workflowUser</arg>
+ <arg>${wf:user()}</arg>
+ <file>${wf:conf("falcon.libpath")}/activemq-core.jar</file>
+ <file>${wf:conf("falcon.libpath")}/geronimo-j2ee-management.jar</file>
+ <file>${wf:conf("falcon.libpath")}/jms.jar</file>
+ <file>${wf:conf("falcon.libpath")}/json-simple.jar</file>
+ <file>${wf:conf("falcon.libpath")}/oozie-client.jar</file>
+ <file>${wf:conf("falcon.libpath")}/spring-jms.jar</file>
+ </java>
+ <ok to="fail"/>
+ <error to="fail"/>
+ </action>
+ <kill name="fail">
+ <message>
+ Workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+ </message>
+ </kill>
+ <end name='end'/>
+</workflow-app>
[2/9] FALCON-369 Refactor workflow builder. Contributed by Shwetha GS
Posted by sh...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/java/org/apache/falcon/converter/OozieProcessWorkflowBuilderTest.java
----------------------------------------------------------------------
diff --git a/process/src/test/java/org/apache/falcon/converter/OozieProcessWorkflowBuilderTest.java b/process/src/test/java/org/apache/falcon/converter/OozieProcessWorkflowBuilderTest.java
deleted file mode 100644
index 1eeadaf..0000000
--- a/process/src/test/java/org/apache/falcon/converter/OozieProcessWorkflowBuilderTest.java
+++ /dev/null
@@ -1,799 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.falcon.converter;
-
-import org.apache.falcon.FalconException;
-import org.apache.falcon.Tag;
-import org.apache.falcon.cluster.util.EmbeddedCluster;
-import org.apache.falcon.entity.CatalogStorage;
-import org.apache.falcon.entity.ClusterHelper;
-import org.apache.falcon.entity.EntityUtil;
-import org.apache.falcon.entity.FeedHelper;
-import org.apache.falcon.entity.ProcessHelper;
-import org.apache.falcon.entity.Storage;
-import org.apache.falcon.entity.store.ConfigurationStore;
-import org.apache.falcon.entity.v0.EntityType;
-import org.apache.falcon.entity.v0.Frequency;
-import org.apache.falcon.entity.v0.SchemaHelper;
-import org.apache.falcon.entity.v0.cluster.Cluster;
-import org.apache.falcon.entity.v0.cluster.Interfacetype;
-import org.apache.falcon.entity.v0.feed.Feed;
-import org.apache.falcon.entity.v0.feed.LocationType;
-import org.apache.falcon.entity.v0.process.Input;
-import org.apache.falcon.entity.v0.process.Output;
-import org.apache.falcon.entity.v0.process.Process;
-import org.apache.falcon.entity.v0.process.Validity;
-import org.apache.falcon.entity.v0.process.Workflow;
-import org.apache.falcon.messaging.EntityInstanceMessage;
-import org.apache.falcon.oozie.bundle.BUNDLEAPP;
-import org.apache.falcon.oozie.coordinator.CONFIGURATION.Property;
-import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
-import org.apache.falcon.oozie.coordinator.SYNCDATASET;
-import org.apache.falcon.oozie.workflow.ACTION;
-import org.apache.falcon.oozie.workflow.DECISION;
-import org.apache.falcon.oozie.workflow.PIG;
-import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
-import org.apache.falcon.security.CurrentUser;
-import org.apache.falcon.security.SecurityUtil;
-import org.apache.falcon.util.OozieUtils;
-import org.apache.falcon.util.StartupProperties;
-import org.apache.falcon.workflow.OozieProcessWorkflowBuilder;
-import org.apache.falcon.workflow.OozieWorkflowBuilder;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.testng.Assert;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.BeforeMethod;
-import org.testng.annotations.DataProvider;
-import org.testng.annotations.Test;
-
-import javax.xml.bind.JAXBContext;
-import javax.xml.bind.JAXBElement;
-import javax.xml.bind.Unmarshaller;
-import javax.xml.transform.stream.StreamSource;
-import javax.xml.validation.Schema;
-import javax.xml.validation.SchemaFactory;
-import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.net.URL;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertTrue;
-
-/**
- * Test for the Falcon entities mapping into Oozie artifacts.
- */
-public class OozieProcessWorkflowBuilderTest extends AbstractTestBase {
-
- private String hdfsUrl;
- private FileSystem fs;
- private Cluster cluster;
-
- @BeforeClass
- public void setUpDFS() throws Exception {
- CurrentUser.authenticate("falcon");
-
- Configuration conf = EmbeddedCluster.newCluster("testCluster").getConf();
- hdfsUrl = conf.get("fs.default.name");
- }
-
- @BeforeMethod
- public void setUp() throws Exception {
- super.setup();
-
- ConfigurationStore store = ConfigurationStore.get();
- cluster = store.get(EntityType.CLUSTER, "corp");
- org.apache.falcon.entity.v0.cluster.Property property =
- new org.apache.falcon.entity.v0.cluster.Property();
- property.setName(OozieWorkflowBuilder.METASTORE_KERBEROS_PRINCIPAL);
- property.setValue("hive/_HOST");
- cluster.getProperties().getProperties().add(property);
-
- ClusterHelper.getInterface(cluster, Interfacetype.WRITE).setEndpoint(hdfsUrl);
- ClusterHelper.getInterface(cluster, Interfacetype.REGISTRY).setEndpoint("thrift://localhost:49083");
- fs = new Path(hdfsUrl).getFileSystem(EmbeddedCluster.newConfiguration());
- fs.create(new Path(ClusterHelper.getLocation(cluster, "working"), "libext/PROCESS/ext.jar")).close();
-
- Process process = store.get(EntityType.PROCESS, "clicksummary");
- Path wfpath = new Path(process.getWorkflow().getPath());
- assert new Path(hdfsUrl).getFileSystem(EmbeddedCluster.newConfiguration()).mkdirs(wfpath);
- }
-
- public void testDefCoordMap(Process process, COORDINATORAPP coord) throws Exception {
- assertEquals("FALCON_PROCESS_DEFAULT_" + process.getName(), coord.getName());
- Validity processValidity = process.getClusters().getClusters().get(0).getValidity();
- assertEquals(SchemaHelper.formatDateUTC(processValidity.getStart()), coord.getStart());
- assertEquals(SchemaHelper.formatDateUTC(processValidity.getEnd()), coord.getEnd());
- assertEquals("${coord:" + process.getFrequency().toString() + "}", coord.getFrequency());
- assertEquals(process.getTimezone().getID(), coord.getTimezone());
-
- assertEquals(process.getParallel() + "", coord.getControls().getConcurrency());
- assertEquals(process.getOrder().name(), coord.getControls().getExecution());
-
- assertEquals(process.getInputs().getInputs().get(0).getName(),
- coord.getInputEvents().getDataIn().get(0).getName());
- assertEquals(process.getInputs().getInputs().get(0).getName(),
- coord.getInputEvents().getDataIn().get(0).getDataset());
- assertEquals("${" + process.getInputs().getInputs().get(0).getStart() + "}",
- coord.getInputEvents().getDataIn().get(0).getStartInstance());
- assertEquals("${" + process.getInputs().getInputs().get(0).getEnd() + "}",
- coord.getInputEvents().getDataIn().get(0).getEndInstance());
-
- assertEquals(process.getInputs().getInputs().get(1).getName(),
- coord.getInputEvents().getDataIn().get(1).getName());
- assertEquals(process.getInputs().getInputs().get(1).getName(),
- coord.getInputEvents().getDataIn().get(1).getDataset());
- assertEquals("${" + process.getInputs().getInputs().get(1).getStart() + "}",
- coord.getInputEvents().getDataIn().get(1).getStartInstance());
- assertEquals("${" + process.getInputs().getInputs().get(1).getEnd() + "}",
- coord.getInputEvents().getDataIn().get(1).getEndInstance());
-
- assertEquals(process.getOutputs().getOutputs().get(0).getName() + "stats",
- coord.getOutputEvents().getDataOut().get(1).getName());
- assertEquals(process.getOutputs().getOutputs().get(0).getName() + "meta",
- coord.getOutputEvents().getDataOut().get(2).getName());
- assertEquals(process.getOutputs().getOutputs().get(0).getName() + "tmp",
- coord.getOutputEvents().getDataOut().get(3).getName());
-
- assertEquals(process.getOutputs().getOutputs().get(0).getName(),
- coord.getOutputEvents().getDataOut().get(0).getName());
- assertEquals("${" + process.getOutputs().getOutputs().get(0).getInstance() + "}",
- coord.getOutputEvents().getDataOut().get(0).getInstance());
- assertEquals(process.getOutputs().getOutputs().get(0).getName(),
- coord.getOutputEvents().getDataOut().get(0).getDataset());
-
- assertEquals(6, coord.getDatasets().getDatasetOrAsyncDataset().size());
-
- ConfigurationStore store = ConfigurationStore.get();
- Feed feed = store.get(EntityType.FEED, process.getInputs().getInputs().get(0).getFeed());
- SYNCDATASET ds = (SYNCDATASET) coord.getDatasets().getDatasetOrAsyncDataset().get(0);
-
- final org.apache.falcon.entity.v0.feed.Cluster feedCluster = feed.getClusters().getClusters().get(0);
- assertEquals(SchemaHelper.formatDateUTC(feedCluster.getValidity().getStart()), ds.getInitialInstance());
- assertEquals(feed.getTimezone().getID(), ds.getTimezone());
- assertEquals("${coord:" + feed.getFrequency().toString() + "}", ds.getFrequency());
- assertEquals("", ds.getDoneFlag());
- assertEquals(ds.getUriTemplate(),
- FeedHelper.createStorage(feedCluster, feed).getUriTemplate(LocationType.DATA));
-
- HashMap<String, String> props = new HashMap<String, String>();
- for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
- props.put(prop.getName(), prop.getValue());
- }
- assertEquals(props.get("mapred.job.priority"), "LOW");
- Assert.assertEquals(props.get("logDir"), getLogPath(process));
-
- assertLibExtensions(coord);
- }
-
- private String getLogPath(Process process) {
- Path logPath = EntityUtil.getLogPath(cluster, process);
- return (logPath.toUri().getScheme() == null ? "${nameNode}" : "") + logPath;
- }
-
- @Test
- public void testBundle() throws Exception {
- String path = StartupProperties.get().getProperty("system.lib.location");
- if (!new File(path).exists()) {
- Assert.assertTrue(new File(path).mkdirs());
- }
- Process process = ConfigurationStore.get().get(EntityType.PROCESS, "clicksummary");
-
- WORKFLOWAPP parentWorkflow = initializeProcessMapper(process, "12", "360");
- testParentWorkflow(process, parentWorkflow);
- }
-
- @Test
- public void testBundle1() throws Exception {
- Process process = ConfigurationStore.get().get(EntityType.PROCESS, "clicksummary");
- process.setFrequency(Frequency.fromString("minutes(1)"));
- process.setTimeout(Frequency.fromString("minutes(15)"));
-
- WORKFLOWAPP parentWorkflow = initializeProcessMapper(process, "30", "15");
- testParentWorkflow(process, parentWorkflow);
- }
-
- @Test
- public void testPigProcessMapper() throws Exception {
- Process process = ConfigurationStore.get().get(EntityType.PROCESS, "pig-process");
- Assert.assertEquals("pig", process.getWorkflow().getEngine().value());
-
- prepare(process);
- WORKFLOWAPP parentWorkflow = initializeProcessMapper(process, "12", "360");
- testParentWorkflow(process, parentWorkflow);
-
- List<Object> decisionOrForkOrJoin = parentWorkflow.getDecisionOrForkOrJoin();
-
- ACTION pigActionNode = (ACTION) decisionOrForkOrJoin.get(3);
- Assert.assertEquals("user-pig-job", pigActionNode.getName());
-
- final PIG pigAction = pigActionNode.getPig();
- Assert.assertEquals(pigAction.getScript(), "${nameNode}/falcon/staging/workflows/pig-process/user/id.pig");
- Assert.assertNotNull(pigAction.getPrepare());
- Assert.assertEquals(1, pigAction.getPrepare().getDelete().size());
- Assert.assertFalse(pigAction.getParam().isEmpty());
- Assert.assertEquals(5, pigAction.getParam().size());
- Assert.assertEquals(Collections.EMPTY_LIST, pigAction.getArchive());
- Assert.assertTrue(pigAction.getFile().size() > 0);
-
- ACTION oozieAction = (ACTION) decisionOrForkOrJoin.get(5);
- Assert.assertEquals("user-oozie-workflow", oozieAction.getName());
- Assert.assertEquals("#USER_WF_PATH#", oozieAction.getSubWorkflow().getAppPath());
- }
-
- @DataProvider(name = "secureOptions")
- private Object[][] createOptions() {
- return new Object[][] {
- {"simple"},
- {"kerberos"},
- };
- }
-
- @Test (dataProvider = "secureOptions")
- public void testHiveProcessMapper(String secureOption) throws Exception {
- StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);
-
- URL resource = this.getClass().getResource("/config/feed/hive-table-feed.xml");
- Feed inFeed = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(resource);
- ConfigurationStore.get().publish(EntityType.FEED, inFeed);
-
- resource = this.getClass().getResource("/config/feed/hive-table-feed-out.xml");
- Feed outFeed = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(resource);
- ConfigurationStore.get().publish(EntityType.FEED, outFeed);
-
- resource = this.getClass().getResource("/config/process/hive-process.xml");
- Process process = (Process) EntityType.PROCESS.getUnmarshaller().unmarshal(resource);
- ConfigurationStore.get().publish(EntityType.PROCESS, process);
-
- prepare(process);
- OozieProcessWorkflowBuilder builder = new OozieProcessWorkflowBuilder(process);
- Path bundlePath = new Path("/falcon/staging/workflows", process.getName());
- builder.map(cluster, bundlePath);
- assertTrue(fs.exists(bundlePath));
-
- BUNDLEAPP bundle = getBundle(bundlePath);
- assertEquals(EntityUtil.getWorkflowName(process).toString(), bundle.getName());
- assertEquals(1, bundle.getCoordinator().size());
- assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString(),
- bundle.getCoordinator().get(0).getName());
- String coordPath = bundle.getCoordinator().get(0).getAppPath().replace("${nameNode}", "");
-
- COORDINATORAPP coord = getCoordinator(new Path(coordPath));
- HashMap<String, String> props = new HashMap<String, String>();
- for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
- props.put(prop.getName(), prop.getValue());
- }
-
- // verify table and hive props
- Map<String, String> expected = getExpectedProperties(inFeed, outFeed, process);
- expected.putAll(ClusterHelper.geHiveProperties(cluster));
- for (Map.Entry<String, String> entry : props.entrySet()) {
- if (expected.containsKey(entry.getKey())) {
- Assert.assertEquals(entry.getValue(), expected.get(entry.getKey()));
- }
- }
- Assert.assertEquals(props.get("logDir"), getLogPath(process));
-
- String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
- WORKFLOWAPP parentWorkflow = getParentWorkflow(new Path(wfPath));
- testParentWorkflow(process, parentWorkflow);
-
- List<Object> decisionOrForkOrJoin = parentWorkflow.getDecisionOrForkOrJoin();
-
- ACTION hiveNode = (ACTION) decisionOrForkOrJoin.get(4);
- Assert.assertEquals("user-hive-job", hiveNode.getName());
-
- JAXBElement<org.apache.falcon.oozie.hive.ACTION> actionJaxbElement = OozieUtils.unMarshalHiveAction(hiveNode);
- org.apache.falcon.oozie.hive.ACTION hiveAction = actionJaxbElement.getValue();
-
- Assert.assertEquals(hiveAction.getScript(),
- "${nameNode}/falcon/staging/workflows/hive-process/user/script.hql");
- Assert.assertEquals(hiveAction.getJobXml(), "${wf:appPath()}/conf/hive-site.xml");
- Assert.assertNull(hiveAction.getPrepare());
- Assert.assertEquals(Collections.EMPTY_LIST, hiveAction.getArchive());
- Assert.assertFalse(hiveAction.getParam().isEmpty());
- Assert.assertEquals(11, hiveAction.getParam().size());
-
- Assert.assertTrue(Storage.TYPE.TABLE == ProcessHelper.getStorageType(cluster, process));
- assertHCatCredentials(parentWorkflow, wfPath);
-
- ConfigurationStore.get().remove(EntityType.PROCESS, process.getName());
- }
-
- @Test (dataProvider = "secureOptions")
- public void testHiveProcessMapperWithFSInputFeedAndTableOutputFeed(String secureOption) throws Exception {
- StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);
-
- URL resource = this.getClass().getResource("/config/feed/hive-table-feed-out.xml");
- Feed outFeed = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(resource);
- ConfigurationStore.get().publish(EntityType.FEED, outFeed);
-
- resource = this.getClass().getResource("/config/process/hive-process-FSInputFeed.xml");
- Process process = (Process) EntityType.PROCESS.getUnmarshaller().unmarshal(resource);
- ConfigurationStore.get().publish(EntityType.PROCESS, process);
-
- prepare(process);
- OozieProcessWorkflowBuilder builder = new OozieProcessWorkflowBuilder(process);
- Path bundlePath = new Path("/falcon/staging/workflows", process.getName());
- builder.map(cluster, bundlePath);
- assertTrue(fs.exists(bundlePath));
-
- BUNDLEAPP bundle = getBundle(bundlePath);
- assertEquals(EntityUtil.getWorkflowName(process).toString(), bundle.getName());
- assertEquals(1, bundle.getCoordinator().size());
- assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString(),
- bundle.getCoordinator().get(0).getName());
- String coordPath = bundle.getCoordinator().get(0).getAppPath().replace("${nameNode}", "");
-
- COORDINATORAPP coord = getCoordinator(new Path(coordPath));
- HashMap<String, String> props = new HashMap<String, String>();
- for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
- props.put(prop.getName(), prop.getValue());
- }
- Assert.assertEquals(props.get("logDir"), getLogPath(process));
-
- String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
- WORKFLOWAPP parentWorkflow = getParentWorkflow(new Path(wfPath));
- testParentWorkflow(process, parentWorkflow);
-
- List<Object> decisionOrForkOrJoin = parentWorkflow.getDecisionOrForkOrJoin();
-
- ACTION hiveNode = (ACTION) decisionOrForkOrJoin.get(4);
- Assert.assertEquals("user-hive-job", hiveNode.getName());
-
- JAXBElement<org.apache.falcon.oozie.hive.ACTION> actionJaxbElement = OozieUtils.unMarshalHiveAction(hiveNode);
- org.apache.falcon.oozie.hive.ACTION hiveAction = actionJaxbElement.getValue();
-
- Assert.assertEquals(hiveAction.getScript(),
- "${nameNode}/falcon/staging/workflows/hive-process/user/script.hql");
- Assert.assertEquals(hiveAction.getJobXml(), "${wf:appPath()}/conf/hive-site.xml");
- Assert.assertNull(hiveAction.getPrepare());
- Assert.assertEquals(Collections.EMPTY_LIST, hiveAction.getArchive());
- Assert.assertFalse(hiveAction.getParam().isEmpty());
- Assert.assertEquals(7, hiveAction.getParam().size());
-
- Assert.assertTrue(Storage.TYPE.TABLE == ProcessHelper.getStorageType(cluster, process));
- assertHCatCredentials(parentWorkflow, wfPath);
-
- ConfigurationStore.get().remove(EntityType.PROCESS, process.getName());
- }
-
- @Test (dataProvider = "secureOptions")
- public void testHiveProcessMapperWithTableInputFeedAndFSOutputFeed(String secureOption) throws Exception {
- StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);
-
- URL resource = this.getClass().getResource("/config/feed/hive-table-feed.xml");
- Feed inFeed = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(resource);
- ConfigurationStore.get().publish(EntityType.FEED, inFeed);
-
- resource = this.getClass().getResource("/config/process/hive-process-FSOutputFeed.xml");
- Process process = (Process) EntityType.PROCESS.getUnmarshaller().unmarshal(resource);
- ConfigurationStore.get().publish(EntityType.PROCESS, process);
-
- prepare(process);
- OozieProcessWorkflowBuilder builder = new OozieProcessWorkflowBuilder(process);
- Path bundlePath = new Path("/falcon/staging/workflows", process.getName());
- builder.map(cluster, bundlePath);
- assertTrue(fs.exists(bundlePath));
-
- BUNDLEAPP bundle = getBundle(bundlePath);
- assertEquals(EntityUtil.getWorkflowName(process).toString(), bundle.getName());
- assertEquals(1, bundle.getCoordinator().size());
- assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString(),
- bundle.getCoordinator().get(0).getName());
- String coordPath = bundle.getCoordinator().get(0).getAppPath().replace("${nameNode}", "");
-
- COORDINATORAPP coord = getCoordinator(new Path(coordPath));
- HashMap<String, String> props = new HashMap<String, String>();
- for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
- props.put(prop.getName(), prop.getValue());
- }
- Assert.assertEquals(props.get("logDir"), getLogPath(process));
-
- String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
- WORKFLOWAPP parentWorkflow = getParentWorkflow(new Path(wfPath));
- testParentWorkflow(process, parentWorkflow);
-
- List<Object> decisionOrForkOrJoin = parentWorkflow.getDecisionOrForkOrJoin();
-
- ACTION hiveNode = (ACTION) decisionOrForkOrJoin.get(4);
- Assert.assertEquals("user-hive-job", hiveNode.getName());
-
- JAXBElement<org.apache.falcon.oozie.hive.ACTION> actionJaxbElement = OozieUtils.unMarshalHiveAction(hiveNode);
- org.apache.falcon.oozie.hive.ACTION hiveAction = actionJaxbElement.getValue();
-
- Assert.assertEquals(hiveAction.getScript(),
- "${nameNode}/falcon/staging/workflows/hive-process/user/script.hql");
- Assert.assertEquals(hiveAction.getJobXml(), "${wf:appPath()}/conf/hive-site.xml");
- Assert.assertNotNull(hiveAction.getPrepare());
- Assert.assertEquals(Collections.EMPTY_LIST, hiveAction.getArchive());
- Assert.assertFalse(hiveAction.getParam().isEmpty());
- Assert.assertEquals(6, hiveAction.getParam().size());
-
- Assert.assertTrue(Storage.TYPE.TABLE == ProcessHelper.getStorageType(cluster, process));
- assertHCatCredentials(parentWorkflow, wfPath);
-
- ConfigurationStore.get().remove(EntityType.PROCESS, process.getName());
- }
-
- @Test (dataProvider = "secureOptions")
- public void testHiveProcessWithNoInputsAndOutputs(String secureOption) throws Exception {
- StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);
-
- URL resource = this.getClass().getResource("/config/process/dumb-hive-process.xml");
- Process process = (Process) EntityType.PROCESS.getUnmarshaller().unmarshal(resource);
- ConfigurationStore.get().publish(EntityType.PROCESS, process);
-
- prepare(process);
- OozieProcessWorkflowBuilder builder = new OozieProcessWorkflowBuilder(process);
- Path bundlePath = new Path("/falcon/staging/workflows", process.getName());
- builder.map(cluster, bundlePath);
- assertTrue(fs.exists(bundlePath));
-
- BUNDLEAPP bundle = getBundle(bundlePath);
- assertEquals(EntityUtil.getWorkflowName(process).toString(), bundle.getName());
- assertEquals(1, bundle.getCoordinator().size());
- assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString(),
- bundle.getCoordinator().get(0).getName());
- String coordPath = bundle.getCoordinator().get(0).getAppPath().replace("${nameNode}", "");
-
- COORDINATORAPP coord = getCoordinator(new Path(coordPath));
- HashMap<String, String> props = new HashMap<String, String>();
- for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
- props.put(prop.getName(), prop.getValue());
- }
- Assert.assertEquals(props.get("logDir"), getLogPath(process));
-
- String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
- WORKFLOWAPP parentWorkflow = getParentWorkflow(new Path(wfPath));
- testParentWorkflow(process, parentWorkflow);
-
- List<Object> decisionOrForkOrJoin = parentWorkflow.getDecisionOrForkOrJoin();
-
- ACTION hiveNode = (ACTION) decisionOrForkOrJoin.get(4);
- Assert.assertEquals("user-hive-job", hiveNode.getName());
-
- JAXBElement<org.apache.falcon.oozie.hive.ACTION> actionJaxbElement = OozieUtils.unMarshalHiveAction(hiveNode);
- org.apache.falcon.oozie.hive.ACTION hiveAction = actionJaxbElement.getValue();
-
- Assert.assertEquals(hiveAction.getScript(),
- "${nameNode}/falcon/staging/workflows/hive-process/user/script.hql");
- Assert.assertEquals(hiveAction.getJobXml(), "${wf:appPath()}/conf/hive-site.xml");
- Assert.assertNull(hiveAction.getPrepare());
- Assert.assertEquals(Collections.EMPTY_LIST, hiveAction.getArchive());
- Assert.assertTrue(hiveAction.getParam().isEmpty());
-
- assertHCatCredentials(parentWorkflow, wfPath);
-
- ConfigurationStore.get().remove(EntityType.PROCESS, process.getName());
- }
-
- private void assertHCatCredentials(WORKFLOWAPP wf, String wfPath) throws IOException {
- Path hiveConfPath = new Path(wfPath, "conf/hive-site.xml");
- Assert.assertTrue(fs.exists(hiveConfPath));
-
- if (SecurityUtil.isSecurityEnabled()) {
- Assert.assertNotNull(wf.getCredentials());
- Assert.assertEquals(1, wf.getCredentials().getCredential().size());
- }
-
- List<Object> actions = wf.getDecisionOrForkOrJoin();
- for (Object obj : actions) {
- if (!(obj instanceof ACTION)) {
- continue;
- }
-
- ACTION action = (ACTION) obj;
-
- if (!SecurityUtil.isSecurityEnabled()) {
- Assert.assertNull(action.getCred());
- return;
- }
-
- String actionName = action.getName();
- if ("user-hive-job".equals(actionName) || "user-pig-job".equals(actionName)
- || "user-oozie-workflow".equals(actionName) || "recordsize".equals(actionName)) {
- Assert.assertNotNull(action.getCred());
- Assert.assertEquals(action.getCred(), "falconHiveAuth");
- }
- }
- }
-
- private void prepare(Process process) throws IOException {
- Path wf = new Path(process.getWorkflow().getPath());
- fs.mkdirs(wf.getParent());
- fs.create(wf).close();
- }
-
- @Test (dataProvider = "secureOptions")
- public void testProcessMapperForTableStorage(String secureOption) throws Exception {
- StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);
-
- URL resource = this.getClass().getResource("/config/feed/hive-table-feed.xml");
- Feed inFeed = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(resource);
- ConfigurationStore.get().publish(EntityType.FEED, inFeed);
-
- resource = this.getClass().getResource("/config/feed/hive-table-feed-out.xml");
- Feed outFeed = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(resource);
- ConfigurationStore.get().publish(EntityType.FEED, outFeed);
-
- resource = this.getClass().getResource("/config/process/pig-process-table.xml");
- Process process = (Process) EntityType.PROCESS.getUnmarshaller().unmarshal(resource);
- ConfigurationStore.get().publish(EntityType.PROCESS, process);
-
- OozieProcessWorkflowBuilder builder = new OozieProcessWorkflowBuilder(process);
- Path bundlePath = new Path("/falcon/staging/workflows", process.getName());
- builder.map(cluster, bundlePath);
- assertTrue(fs.exists(bundlePath));
-
- BUNDLEAPP bundle = getBundle(bundlePath);
- assertEquals(EntityUtil.getWorkflowName(process).toString(), bundle.getName());
- assertEquals(1, bundle.getCoordinator().size());
- assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString(),
- bundle.getCoordinator().get(0).getName());
- String coordPath = bundle.getCoordinator().get(0).getAppPath().replace("${nameNode}", "");
-
- COORDINATORAPP coord = getCoordinator(new Path(coordPath));
- HashMap<String, String> props = new HashMap<String, String>();
- for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
- props.put(prop.getName(), prop.getValue());
- }
-
- // verify table props
- Map<String, String> expected = getExpectedProperties(inFeed, outFeed, process);
- for (Map.Entry<String, String> entry : props.entrySet()) {
- if (expected.containsKey(entry.getKey())) {
- Assert.assertEquals(entry.getValue(), expected.get(entry.getKey()));
- }
- }
- Assert.assertEquals(props.get("logDir"), getLogPath(process));
-
- // verify the late data params
- Assert.assertEquals(props.get("falconInputFeeds"), process.getInputs().getInputs().get(0).getFeed());
- Assert.assertEquals(props.get("falconInPaths"), "${coord:dataIn('input')}");
- Assert.assertEquals(props.get("falconInputFeedStorageTypes"), Storage.TYPE.TABLE.name());
-
- // verify the post processing params
- Assert.assertEquals(props.get("feedNames"), process.getOutputs().getOutputs().get(0).getFeed());
- Assert.assertEquals(props.get("feedInstancePaths"), "${coord:dataOut('output')}");
-
- String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
- WORKFLOWAPP parentWorkflow = getParentWorkflow(new Path(wfPath));
-
- Assert.assertTrue(Storage.TYPE.TABLE == ProcessHelper.getStorageType(cluster, process));
- assertHCatCredentials(parentWorkflow, wfPath);
- }
-
- private Map<String, String> getExpectedProperties(Feed inFeed, Feed outFeed,
- Process process) throws FalconException {
- Map<String, String> expected = new HashMap<String, String>();
- for (Input input : process.getInputs().getInputs()) {
- CatalogStorage storage = (CatalogStorage) FeedHelper.createStorage(cluster, inFeed);
- propagateStorageProperties(input.getName(), storage, expected);
- }
-
- for (Output output : process.getOutputs().getOutputs()) {
- CatalogStorage storage = (CatalogStorage) FeedHelper.createStorage(cluster, outFeed);
- propagateStorageProperties(output.getName(), storage, expected);
- }
-
- return expected;
- }
-
- private void propagateStorageProperties(String feedName, CatalogStorage tableStorage,
- Map<String, String> props) {
- String prefix = "falcon_" + feedName;
- props.put(prefix + "_storage_type", tableStorage.getType().name());
- props.put(prefix + "_catalog_url", tableStorage.getCatalogUrl());
- props.put(prefix + "_database", tableStorage.getDatabase());
- props.put(prefix + "_table", tableStorage.getTable());
-
- if (prefix.equals("falcon_input")) {
- props.put(prefix + "_partition_filter_pig", "${coord:dataInPartitionFilter('input', 'pig')}");
- props.put(prefix + "_partition_filter_hive", "${coord:dataInPartitionFilter('input', 'hive')}");
- props.put(prefix + "_partition_filter_java", "${coord:dataInPartitionFilter('input', 'java')}");
- props.put(prefix + "_datain_partitions_hive", "${coord:dataInPartitions('input', 'hive-export')}");
- } else if (prefix.equals("falcon_output")) {
- props.put(prefix + "_dataout_partitions", "${coord:dataOutPartitions('output')}");
- }
- }
-
- @Test
- public void testProcessWorkflowMapper() throws Exception {
- Process process = ConfigurationStore.get().get(EntityType.PROCESS, "clicksummary");
- Workflow processWorkflow = process.getWorkflow();
- Assert.assertEquals("test", processWorkflow.getName());
- Assert.assertEquals("1.0.0", processWorkflow.getVersion());
- }
-
- @SuppressWarnings("unchecked")
- private void assertLibExtensions(COORDINATORAPP coord) throws Exception {
- String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
- JAXBContext jaxbContext = JAXBContext.newInstance(WORKFLOWAPP.class);
- WORKFLOWAPP wf = ((JAXBElement<WORKFLOWAPP>) jaxbContext.createUnmarshaller().unmarshal(
- fs.open(new Path(wfPath, "workflow.xml")))).getValue();
- List<Object> actions = wf.getDecisionOrForkOrJoin();
- for (Object obj : actions) {
- if (!(obj instanceof ACTION)) {
- continue;
- }
- ACTION action = (ACTION) obj;
- List<String> files = null;
- if (action.getJava() != null) {
- files = action.getJava().getFile();
- } else if (action.getPig() != null) {
- files = action.getPig().getFile();
- } else if (action.getMapReduce() != null) {
- files = action.getMapReduce().getFile();
- }
- if (files != null) {
- Assert.assertTrue(files.get(files.size() - 1)
- .endsWith("/projects/falcon/working/libext/PROCESS/ext.jar"));
- }
- }
- }
-
- private WORKFLOWAPP initializeProcessMapper(Process process, String throttle, String timeout)
- throws Exception {
- OozieProcessWorkflowBuilder builder = new OozieProcessWorkflowBuilder(process);
- Path bundlePath = new Path("/falcon/staging/workflows", process.getName());
- builder.map(cluster, bundlePath);
- assertTrue(fs.exists(bundlePath));
-
- BUNDLEAPP bundle = getBundle(bundlePath);
- assertEquals(EntityUtil.getWorkflowName(process).toString(), bundle.getName());
- assertEquals(1, bundle.getCoordinator().size());
- assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString(),
- bundle.getCoordinator().get(0).getName());
- String coordPath = bundle.getCoordinator().get(0).getAppPath().replace("${nameNode}", "");
-
- COORDINATORAPP coord = getCoordinator(new Path(coordPath));
- testDefCoordMap(process, coord);
- assertEquals(coord.getControls().getThrottle(), throttle);
- assertEquals(coord.getControls().getTimeout(), timeout);
-
- String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
- return getParentWorkflow(new Path(wfPath));
- }
-
- public void testParentWorkflow(Process process, WORKFLOWAPP parentWorkflow) {
- Assert.assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString(), parentWorkflow.getName());
-
- List<Object> decisionOrForkOrJoin = parentWorkflow.getDecisionOrForkOrJoin();
- Assert.assertEquals("should-record", ((DECISION) decisionOrForkOrJoin.get(0)).getName());
- Assert.assertEquals("recordsize", ((ACTION) decisionOrForkOrJoin.get(1)).getName());
- Assert.assertEquals("user-workflow", ((DECISION) decisionOrForkOrJoin.get(2)).getName());
- Assert.assertEquals("user-pig-job", ((ACTION) decisionOrForkOrJoin.get(3)).getName());
- Assert.assertEquals("user-hive-job", ((ACTION) decisionOrForkOrJoin.get(4)).getName());
- Assert.assertEquals("user-oozie-workflow", ((ACTION) decisionOrForkOrJoin.get(5)).getName());
- Assert.assertEquals("succeeded-post-processing", ((ACTION) decisionOrForkOrJoin.get(6)).getName());
- Assert.assertEquals("failed-post-processing", ((ACTION) decisionOrForkOrJoin.get(7)).getName());
- Assert.assertEquals("3", ((ACTION) decisionOrForkOrJoin.get(1)).getRetryMax());
- Assert.assertEquals("1", ((ACTION) decisionOrForkOrJoin.get(1)).getRetryInterval());
- Assert.assertEquals("3", ((ACTION) decisionOrForkOrJoin.get(6)).getRetryMax());
- Assert.assertEquals("1", ((ACTION) decisionOrForkOrJoin.get(6)).getRetryInterval());
- Assert.assertEquals("3", ((ACTION) decisionOrForkOrJoin.get(7)).getRetryMax());
- Assert.assertEquals("1", ((ACTION) decisionOrForkOrJoin.get(7)).getRetryInterval());
- }
-
- private COORDINATORAPP getCoordinator(Path path) throws Exception {
- String bundleStr = readFile(path);
-
- Unmarshaller unmarshaller = JAXBContext.newInstance(COORDINATORAPP.class).createUnmarshaller();
- SchemaFactory schemaFactory = SchemaFactory.newInstance("http://www.w3.org/2001/XMLSchema");
- Schema schema = schemaFactory.newSchema(this.getClass().getResource("/oozie-coordinator-0.3.xsd"));
- unmarshaller.setSchema(schema);
- JAXBElement<COORDINATORAPP> jaxbBundle = unmarshaller.unmarshal(
- new StreamSource(new ByteArrayInputStream(bundleStr.trim().getBytes())), COORDINATORAPP.class);
- return jaxbBundle.getValue();
- }
-
- @SuppressWarnings("unchecked")
- private WORKFLOWAPP getParentWorkflow(Path path) throws Exception {
- String workflow = readFile(new Path(path, "workflow.xml"));
-
- JAXBContext wfAppContext = JAXBContext.newInstance(WORKFLOWAPP.class);
- Unmarshaller unmarshaller = wfAppContext.createUnmarshaller();
- return ((JAXBElement<WORKFLOWAPP>) unmarshaller.unmarshal(
- new StreamSource(new ByteArrayInputStream(workflow.trim().getBytes())))).getValue();
- }
-
- private BUNDLEAPP getBundle(Path path) throws Exception {
- String bundleStr = readFile(new Path(path, "bundle.xml"));
-
- Unmarshaller unmarshaller = JAXBContext.newInstance(BUNDLEAPP.class).createUnmarshaller();
- SchemaFactory schemaFactory = SchemaFactory.newInstance("http://www.w3.org/2001/XMLSchema");
- Schema schema = schemaFactory.newSchema(this.getClass().getResource("/oozie-bundle-0.1.xsd"));
- unmarshaller.setSchema(schema);
- JAXBElement<BUNDLEAPP> jaxbBundle = unmarshaller.unmarshal(
- new StreamSource(new ByteArrayInputStream(bundleStr.trim().getBytes())), BUNDLEAPP.class);
- return jaxbBundle.getValue();
- }
-
- private String readFile(Path path) throws Exception {
- BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(path)));
- String line;
- StringBuilder contents = new StringBuilder();
- while ((line = reader.readLine()) != null) {
- contents.append(line);
- }
- return contents.toString();
- }
-
- @Override
- @AfterMethod
- public void cleanup() throws Exception {
- super.cleanup();
- ConfigurationStore.get().remove(EntityType.PROCESS, "table-process");
- ConfigurationStore.get().remove(EntityType.FEED, "clicks-raw-table");
- ConfigurationStore.get().remove(EntityType.FEED, "clicks-summary-table");
- ConfigurationStore.get().remove(EntityType.PROCESS, "dumb-process");
- }
-
- @Test
- public void testProcessWithNoInputsAndOutputs() throws Exception {
- ClusterHelper.getInterface(cluster, Interfacetype.WRITE).setEndpoint(hdfsUrl);
-
- URL resource = this.getClass().getResource("/config/process/dumb-process.xml");
- Process processEntity = (Process) EntityType.PROCESS.getUnmarshaller().unmarshal(resource);
- ConfigurationStore.get().publish(EntityType.PROCESS, processEntity);
-
- OozieProcessWorkflowBuilder builder = new OozieProcessWorkflowBuilder(processEntity);
- Path bundlePath = new Path("/falcon/staging/workflows", processEntity.getName());
- builder.map(cluster, bundlePath);
- assertTrue(fs.exists(bundlePath));
-
- BUNDLEAPP bundle = getBundle(bundlePath);
- assertEquals(EntityUtil.getWorkflowName(processEntity).toString(), bundle.getName());
- assertEquals(1, bundle.getCoordinator().size());
- assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, processEntity).toString(),
- bundle.getCoordinator().get(0).getName());
- String coordPath = bundle.getCoordinator().get(0).getAppPath().replace("${nameNode}", "");
-
- COORDINATORAPP coord = getCoordinator(new Path(coordPath));
- HashMap<String, String> props = new HashMap<String, String>();
- for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
- props.put(prop.getName(), prop.getValue());
- }
- Assert.assertEquals(props.get("logDir"), getLogPath(processEntity));
-
- String[] expected = {
- EntityInstanceMessage.ARG.feedNames.getPropName(),
- EntityInstanceMessage.ARG.feedInstancePaths.getPropName(),
- "falconInputFeeds",
- "falconInPaths",
- "userWorkflowName",
- "userWorkflowVersion",
- "userWorkflowEngine",
- };
-
- for (String property : expected) {
- Assert.assertTrue(props.containsKey(property), "expected property missing: " + property);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/cluster/cluster-0.1.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/cluster/cluster-0.1.xml b/process/src/test/resources/config/cluster/cluster-0.1.xml
deleted file mode 100644
index 032cc77..0000000
--- a/process/src/test/resources/config/cluster/cluster-0.1.xml
+++ /dev/null
@@ -1,44 +0,0 @@
-<?xml version="1.0"?>
-<!--
- ~ Licensed to the Apache Software Foundation (ASF) under one
- ~ or more contributor license agreements. See the NOTICE file
- ~ distributed with this work for additional information
- ~ regarding copyright ownership. The ASF licenses this file
- ~ to you under the Apache License, Version 2.0 (the
- ~ "License"); you may not use this file except in compliance
- ~ with the License. You may obtain a copy of the License at
- ~
- ~ http://www.apache.org/licenses/LICENSE-2.0
- ~
- ~ Unless required by applicable law or agreed to in writing, software
- ~ distributed under the License is distributed on an "AS IS" BASIS,
- ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ~ See the License for the specific language governing permissions and
- ~ limitations under the License.
- -->
-
-<cluster colo="gs" description="" name="corp" xmlns="uri:falcon:cluster:0.1"
- >
- <interfaces>
- <interface type="readonly" endpoint="hftp://localhost:50010"
- version="0.20.2"/>
- <interface type="write" endpoint="hdfs://localhost:8020"
- version="0.20.2"/>
- <interface type="execute" endpoint="localhost:8021" version="0.20.2"/>
- <interface type="workflow" endpoint="http://localhost:11000/oozie/"
- version="3.1"/>
- <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true"
- version="5.1.6"/>
- <interface type="registry" endpoint="Hcat" version="1"/>
- </interfaces>
- <locations>
- <location name="staging" path="/projects/falcon/staging"/>
- <location name="temp" path="/tmp"/>
- <location name="working" path="/projects/falcon/working"/>
- </locations>
- <properties>
- <property name="field1" value="value1"/>
- <property name="field2" value="value2"/>
- <property name="hive.metastore.client.socket.timeout" value="20"/>
- </properties>
-</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/feed/feed-0.1.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/feed/feed-0.1.xml b/process/src/test/resources/config/feed/feed-0.1.xml
deleted file mode 100644
index fb9b707..0000000
--- a/process/src/test/resources/config/feed/feed-0.1.xml
+++ /dev/null
@@ -1,63 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- ~ Licensed to the Apache Software Foundation (ASF) under one
- ~ or more contributor license agreements. See the NOTICE file
- ~ distributed with this work for additional information
- ~ regarding copyright ownership. The ASF licenses this file
- ~ to you under the Apache License, Version 2.0 (the
- ~ "License"); you may not use this file except in compliance
- ~ with the License. You may obtain a copy of the License at
- ~
- ~ http://www.apache.org/licenses/LICENSE-2.0
- ~
- ~ Unless required by applicable law or agreed to in writing, software
- ~ distributed under the License is distributed on an "AS IS" BASIS,
- ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ~ See the License for the specific language governing permissions and
- ~ limitations under the License.
- -->
-<feed description="clicks log" name="clicks" xmlns="uri:falcon:feed:0.1"
- >
- <partitions>
- <partition name="fraud"/>
- <partition name="country"/>
- </partitions>
-
- <groups>online,bi</groups>
-
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
- <late-arrival cut-off="hours(6)"/>
-
- <clusters>
- <cluster name="corp" type="source">
- <validity start="2011-11-01T00:00Z" end="2099-12-31T23:59Z"/>
- <retention limit="hours(6)" action="delete"/>
- <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
- <locations>
- <location type="data" path="/projects/falcon/clicks/${YY}/${MM}"/>
- <location type="stats" path="/projects/falcon/clicksStats"/>
- <location type="meta" path="/projects/falcon/clicksMetaData"/>
- </locations>
- </cluster>
- <cluster name="backupCluster" type="target">
- <validity start="2011-11-01T00:00Z" end="2099-12-31T23:59Z"/>
- <retention limit="hours(6)" action="archive"/>
- <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
- </cluster>
- </clusters>
-
- <locations>
- <location type="data" path="/projects/falcon/clicks"/>
- <location type="stats" path="/projects/falcon/clicksStats"/>
- <location type="meta" path="/projects/falcon/clicksMetaData"/>
- </locations>
-
- <ACL owner="testuser" group="group" permission="0x755"/>
- <schema location="/schema/clicks" provider="protobuf"/>
-
- <properties>
- <property name="field1" value="value1"/>
- <property name="field2" value="value2"/>
- </properties>
-</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/feed/hive-table-feed-out.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/feed/hive-table-feed-out.xml b/process/src/test/resources/config/feed/hive-table-feed-out.xml
deleted file mode 100644
index bd93a01..0000000
--- a/process/src/test/resources/config/feed/hive-table-feed-out.xml
+++ /dev/null
@@ -1,43 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<feed description="clicks summary table " name="clicks-summary-table" xmlns="uri:falcon:feed:0.1">
- <groups>online,bi</groups>
-
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
- <late-arrival cut-off="hours(6)"/>
-
- <clusters>
- <cluster name="corp" type="source" partition="*/${cluster.colo}">
- <validity start="2021-11-01T00:00Z" end="2021-12-31T00:00Z"/>
- <retention limit="hours(48)" action="delete"/>
- <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
- </cluster>
- <cluster name="backupCluster" type="target">
- <validity start="2011-11-01T00:00Z" end="2011-12-31T00:00Z"/>
- <retention limit="hours(6)" action="archive"/>
- <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
- </cluster>
- </clusters>
-
- <table uri="catalog:default:clicks-summary#ds=${YEAR}-${MONTH}-${DAY}-${HOUR}" />
-
- <ACL owner="testuser" group="group" permission="0x755"/>
- <schema location="/schema/clicks" provider="protobuf"/>
-</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/feed/hive-table-feed.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/feed/hive-table-feed.xml b/process/src/test/resources/config/feed/hive-table-feed.xml
deleted file mode 100644
index 66d0742..0000000
--- a/process/src/test/resources/config/feed/hive-table-feed.xml
+++ /dev/null
@@ -1,43 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<feed description="clicks log table " name="clicks-raw-table" xmlns="uri:falcon:feed:0.1">
- <groups>online,bi</groups>
-
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
- <late-arrival cut-off="hours(6)"/>
-
- <clusters>
- <cluster name="corp" type="source" partition="*/${cluster.colo}">
- <validity start="2021-11-01T00:00Z" end="2021-12-31T00:00Z"/>
- <retention limit="hours(48)" action="delete"/>
- <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
- </cluster>
- <cluster name="backupCluster" type="target">
- <validity start="2011-11-01T00:00Z" end="2011-12-31T00:00Z"/>
- <retention limit="hours(6)" action="archive"/>
- <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
- </cluster>
- </clusters>
-
- <table uri="catalog:default:clicks#ds=${YEAR}-${MONTH}-${DAY}-${HOUR}" />
-
- <ACL owner="testuser" group="group" permission="0x755"/>
- <schema location="/schema/clicks" provider="protobuf"/>
-</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/late/late-cluster.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/late/late-cluster.xml b/process/src/test/resources/config/late/late-cluster.xml
deleted file mode 100644
index ac0817f..0000000
--- a/process/src/test/resources/config/late/late-cluster.xml
+++ /dev/null
@@ -1,43 +0,0 @@
-<?xml version="1.0"?>
-<!--
- ~ Licensed to the Apache Software Foundation (ASF) under one
- ~ or more contributor license agreements. See the NOTICE file
- ~ distributed with this work for additional information
- ~ regarding copyright ownership. The ASF licenses this file
- ~ to you under the Apache License, Version 2.0 (the
- ~ "License"); you may not use this file except in compliance
- ~ with the License. You may obtain a copy of the License at
- ~
- ~ http://www.apache.org/licenses/LICENSE-2.0
- ~
- ~ Unless required by applicable law or agreed to in writing, software
- ~ distributed under the License is distributed on an "AS IS" BASIS,
- ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ~ See the License for the specific language governing permissions and
- ~ limitations under the License.
- -->
-
-<cluster colo="gs" description="" name="late-cluster" xmlns="uri:falcon:cluster:0.1"
- >
- <interfaces>
- <interface type="readonly" endpoint="hftp://localhost:50010"
- version="0.20.2"/>
- <interface type="write" endpoint="hdfs://localhost:8020"
- version="0.20.2"/>
- <interface type="execute" endpoint="localhost:8021" version="0.20.2"/>
- <interface type="workflow" endpoint="http://localhost:11000/oozie/"
- version="3.1"/>
- <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true"
- version="5.1.6"/>
- <interface type="registry" endpoint="Hcat" version="1"/>
- </interfaces>
- <locations>
- <location name="staging" path="/projects/falcon/staging"/>
- <location name="temp" path="/tmp"/>
- <location name="working" path="/projects/falcon/working"/>
- </locations>
- <properties>
- <property name="field1" value="value1"/>
- <property name="field2" value="value2"/>
- </properties>
-</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/late/late-feed1.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/late/late-feed1.xml b/process/src/test/resources/config/late/late-feed1.xml
deleted file mode 100644
index c500c4c..0000000
--- a/process/src/test/resources/config/late/late-feed1.xml
+++ /dev/null
@@ -1,53 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- ~ Licensed to the Apache Software Foundation (ASF) under one
- ~ or more contributor license agreements. See the NOTICE file
- ~ distributed with this work for additional information
- ~ regarding copyright ownership. The ASF licenses this file
- ~ to you under the Apache License, Version 2.0 (the
- ~ "License"); you may not use this file except in compliance
- ~ with the License. You may obtain a copy of the License at
- ~
- ~ http://www.apache.org/licenses/LICENSE-2.0
- ~
- ~ Unless required by applicable law or agreed to in writing, software
- ~ distributed under the License is distributed on an "AS IS" BASIS,
- ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ~ See the License for the specific language governing permissions and
- ~ limitations under the License.
- -->
-<feed description="clicks log" name="late-feed1" xmlns="uri:falcon:feed:0.1"
- >
- <partitions>
- <partition name="fraud"/>
- <partition name="country"/>
- </partitions>
-
- <groups>online,bi</groups>
-
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
- <late-arrival cut-off="minutes(5)"/>
-
- <clusters>
- <cluster name="late-cluster" type="source">
- <validity start="2011-11-01T00:00Z" end="2099-12-31T23:59Z"/>
- <retention limit="hours(6)" action="delete"/>
- <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
- </cluster>
- </clusters>
-
- <locations>
- <location type="data" path="/projects/falcon/clicks"/>
- <location type="stats" path="/projects/falcon/clicksStats"/>
- <location type="meta" path="/projects/falcon/clicksMetaData"/>
- </locations>
-
- <ACL owner="testuser" group="group" permission="0x755"/>
- <schema location="/schema/clicks" provider="protobuf"/>
-
- <properties>
- <property name="field1" value="value1"/>
- <property name="field2" value="value2"/>
- </properties>
-</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/late/late-feed2.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/late/late-feed2.xml b/process/src/test/resources/config/late/late-feed2.xml
deleted file mode 100644
index 6ccffe2..0000000
--- a/process/src/test/resources/config/late/late-feed2.xml
+++ /dev/null
@@ -1,53 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- ~ Licensed to the Apache Software Foundation (ASF) under one
- ~ or more contributor license agreements. See the NOTICE file
- ~ distributed with this work for additional information
- ~ regarding copyright ownership. The ASF licenses this file
- ~ to you under the Apache License, Version 2.0 (the
- ~ "License"); you may not use this file except in compliance
- ~ with the License. You may obtain a copy of the License at
- ~
- ~ http://www.apache.org/licenses/LICENSE-2.0
- ~
- ~ Unless required by applicable law or agreed to in writing, software
- ~ distributed under the License is distributed on an "AS IS" BASIS,
- ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ~ See the License for the specific language governing permissions and
- ~ limitations under the License.
- -->
-<feed description="clicks log" name="late-feed2" xmlns="uri:falcon:feed:0.1"
- >
- <partitions>
- <partition name="fraud"/>
- <partition name="country"/>
- </partitions>
-
- <groups>online,bi</groups>
-
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
- <late-arrival cut-off="minutes(5)"/>
-
- <clusters>
- <cluster name="late-cluster" type="source">
- <validity start="2011-11-01T00:00Z" end="2099-12-31T23:59Z"/>
- <retention limit="hours(6)" action="delete"/>
- <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
- </cluster>
- </clusters>
-
- <locations>
- <location type="data" path="/projects/falcon/clicks"/>
- <location type="stats" path="/projects/falcon/clicksStats"/>
- <location type="meta" path="/projects/falcon/clicksMetaData"/>
- </locations>
-
- <ACL owner="testuser" group="group" permission="0x755"/>
- <schema location="/schema/clicks" provider="protobuf"/>
-
- <properties>
- <property name="field1" value="value1"/>
- <property name="field2" value="value2"/>
- </properties>
-</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/late/late-feed3.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/late/late-feed3.xml b/process/src/test/resources/config/late/late-feed3.xml
deleted file mode 100644
index 239f140..0000000
--- a/process/src/test/resources/config/late/late-feed3.xml
+++ /dev/null
@@ -1,53 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- ~ Licensed to the Apache Software Foundation (ASF) under one
- ~ or more contributor license agreements. See the NOTICE file
- ~ distributed with this work for additional information
- ~ regarding copyright ownership. The ASF licenses this file
- ~ to you under the Apache License, Version 2.0 (the
- ~ "License"); you may not use this file except in compliance
- ~ with the License. You may obtain a copy of the License at
- ~
- ~ http://www.apache.org/licenses/LICENSE-2.0
- ~
- ~ Unless required by applicable law or agreed to in writing, software
- ~ distributed under the License is distributed on an "AS IS" BASIS,
- ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ~ See the License for the specific language governing permissions and
- ~ limitations under the License.
- -->
-<feed description="clicks log" name="late-feed3" xmlns="uri:falcon:feed:0.1"
- >
- <partitions>
- <partition name="fraud"/>
- <partition name="country"/>
- </partitions>
-
- <groups>online,bi</groups>
-
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
- <late-arrival cut-off="hours(6)"/>
-
- <clusters>
- <cluster name="late-cluster" type="source">
- <validity start="2011-11-01T00:00Z" end="2099-12-31T23:59Z"/>
- <retention limit="hours(6)" action="delete"/>
- <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
- </cluster>
- </clusters>
-
- <locations>
- <location type="data" path="/projects/falcon/clicks"/>
- <location type="stats" path="/projects/falcon/clicksStats"/>
- <location type="meta" path="/projects/falcon/clicksMetaData"/>
- </locations>
-
- <ACL owner="testuser" group="group" permission="0x755"/>
- <schema location="/schema/clicks" provider="protobuf"/>
-
- <properties>
- <property name="field1" value="value1"/>
- <property name="field2" value="value2"/>
- </properties>
-</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/late/late-process1.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/late/late-process1.xml b/process/src/test/resources/config/late/late-process1.xml
deleted file mode 100644
index aba5525..0000000
--- a/process/src/test/resources/config/late/late-process1.xml
+++ /dev/null
@@ -1,41 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!-- ~ Licensed to the Apache Software Foundation (ASF) under one ~ or more contributor license agreements. See the NOTICE file ~ distributed with this work for additional information
- ~ regarding copyright ownership. The ASF licenses this file ~ to you under the Apache License, Version 2.0 (the ~ "License"); you may not use this file except in compliance ~ with the
- License. You may obtain a copy of the License at ~ ~ http://www.apache.org/licenses/LICENSE-2.0 ~ ~ Unless required by applicable law or agreed to in writing, software ~ distributed
- under the License is distributed on an "AS IS" BASIS, ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ~ See the License for the specific language governing
- permissions and ~ limitations under the License. -->
-<process name="late-process1" xmlns="uri:falcon:process:0.1">
- <!-- where -->
- <clusters>
- <cluster name="late-cluster">
- <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
- </cluster>
- </clusters>
-
- <!-- when -->
- <concurrency>1</concurrency>
- <execution>LIFO</execution>
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
-
- <!-- what -->
- <inputs>
- <input name="impression" feed="late-feed1" start-instance="today(0,0)" end-instance="today(0,2)"/>
- <input name="clicks" feed="late-feed2" start-instance="yesterday(0,0)" end-instance="today(0,0)"
- partition="*/US"/>
- </inputs>
-
- <outputs>
- <output name="clicksummary" feed="late-feed3" instance="today(0,0)"/>
- </outputs>
-
- <!-- how -->
- <properties>
- <property name="procprop" value="procprop"/>
- </properties>
-
- <workflow engine="oozie" path="/user/guest/workflow"/>
-
- <retry policy="periodic" delay="hours(10)" attempts="3"/>
-
-</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/late/late-process2.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/late/late-process2.xml b/process/src/test/resources/config/late/late-process2.xml
deleted file mode 100644
index bc507ad..0000000
--- a/process/src/test/resources/config/late/late-process2.xml
+++ /dev/null
@@ -1,57 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- ~ Licensed to the Apache Software Foundation (ASF) under one
- ~ or more contributor license agreements. See the NOTICE file
- ~ distributed with this work for additional information
- ~ regarding copyright ownership. The ASF licenses this file
- ~ to you under the Apache License, Version 2.0 (the
- ~ "License"); you may not use this file except in compliance
- ~ with the License. You may obtain a copy of the License at
- ~
- ~ http://www.apache.org/licenses/LICENSE-2.0
- ~
- ~ Unless required by applicable law or agreed to in writing, software
- ~ distributed under the License is distributed on an "AS IS" BASIS,
- ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ~ See the License for the specific language governing permissions and
- ~ limitations under the License.
- -->
-<process name="late-process2" xmlns="uri:falcon:process:0.1">
- <!-- where -->
- <clusters>
- <cluster name="late-cluster">
- <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
- </cluster>
- </clusters>
-
- <!-- when -->
- <concurrency>1</concurrency>
- <execution>LIFO</execution>
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
-
- <!-- what -->
- <inputs>
- <input name="impression" feed="late-feed1" start-instance="today(0,0)" end-instance="today(0,2)"/>
- <input name="clicks" feed="late-feed2" start-instance="yesterday(0,0)" end-instance="today(0,0)"
- partition="*/US"/>
- </inputs>
-
- <outputs>
- <output name="clicksummary" feed="late-feed3" instance="today(0,0)"/>
- </outputs>
-
- <!-- how -->
- <properties>
- <property name="procprop" value="procprop"/>
- </properties>
-
- <workflow engine="oozie" path="/user/guest/workflow"/>
-
- <retry policy="periodic" delay="hours(10)" attempts="3"/>
-
- <late-process policy="exp-backoff" delay="hours(1)">
- <late-input feed="impression" workflow-path="hdfs://impression/late/workflow"/>
- <late-input feed="clicks" workflow-path="hdfs://clicks/late/workflow"/>
- </late-process>
-</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/process/dumb-hive-process.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/process/dumb-hive-process.xml b/process/src/test/resources/config/process/dumb-hive-process.xml
deleted file mode 100644
index c504074..0000000
--- a/process/src/test/resources/config/process/dumb-hive-process.xml
+++ /dev/null
@@ -1,39 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<process name="hive-process" xmlns="uri:falcon:process:0.1">
- <!-- where -->
- <clusters>
- <cluster name="corp">
- <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
- </cluster>
- </clusters>
-
- <!-- when -->
- <parallel>1</parallel>
- <order>LIFO</order>
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
-
- <!-- what = none -->
-
- <!-- how -->
- <workflow engine="hive" path="/apps/hive/script.hql"/>
-
- <retry policy="periodic" delay="minutes(10)" attempts="3"/>
-</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/process/dumb-process.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/process/dumb-process.xml b/process/src/test/resources/config/process/dumb-process.xml
deleted file mode 100644
index b71f089..0000000
--- a/process/src/test/resources/config/process/dumb-process.xml
+++ /dev/null
@@ -1,40 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<process name="dumb-process" xmlns="uri:falcon:process:0.1">
- <!-- where -->
- <clusters>
- <cluster name="corp">
- <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
- </cluster>
- </clusters>
-
- <!-- when -->
- <parallel>1</parallel>
- <order>LIFO</order>
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
-
- <!-- what = none -->
-
- <!-- how -->
- <workflow engine="pig" path="/apps/pig/id.pig"/>
-
- <retry policy="periodic" delay="hours(10)" attempts="3"/>
-
-</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/process/hive-process-FSInputFeed.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/process/hive-process-FSInputFeed.xml b/process/src/test/resources/config/process/hive-process-FSInputFeed.xml
deleted file mode 100644
index d871377..0000000
--- a/process/src/test/resources/config/process/hive-process-FSInputFeed.xml
+++ /dev/null
@@ -1,46 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<process name="hive-process" xmlns="uri:falcon:process:0.1">
- <!-- where -->
- <clusters>
- <cluster name="corp">
- <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
- </cluster>
- </clusters>
-
- <!-- when -->
- <parallel>1</parallel>
- <order>LIFO</order>
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
-
- <!-- what -->
- <inputs>
- <input name="input" feed="clicks" start="yesterday(0,0)" end="yesterday(20,0)"/>
- </inputs>
-
- <outputs>
- <output name="output" feed="clicks-summary-table" instance="today(0,0)"/>
- </outputs>
-
- <!-- how -->
- <workflow engine="hive" path="/apps/hive/script.hql"/>
-
- <retry policy="periodic" delay="minutes(10)" attempts="3"/>
-</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/process/hive-process-FSOutputFeed.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/process/hive-process-FSOutputFeed.xml b/process/src/test/resources/config/process/hive-process-FSOutputFeed.xml
deleted file mode 100644
index 23d96c3..0000000
--- a/process/src/test/resources/config/process/hive-process-FSOutputFeed.xml
+++ /dev/null
@@ -1,46 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<process name="hive-process" xmlns="uri:falcon:process:0.1">
- <!-- where -->
- <clusters>
- <cluster name="corp">
- <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
- </cluster>
- </clusters>
-
- <!-- when -->
- <parallel>1</parallel>
- <order>LIFO</order>
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
-
- <!-- what -->
- <inputs>
- <input name="input" feed="clicks-raw-table" start="yesterday(0,0)" end="yesterday(20,0)"/>
- </inputs>
-
- <outputs>
- <output name="output" feed="clicks" instance="today(0,0)"/>
- </outputs>
-
- <!-- how -->
- <workflow engine="hive" path="/apps/hive/script.hql"/>
-
- <retry policy="periodic" delay="minutes(10)" attempts="3"/>
-</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/process/hive-process.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/process/hive-process.xml b/process/src/test/resources/config/process/hive-process.xml
deleted file mode 100644
index 4dac8e9..0000000
--- a/process/src/test/resources/config/process/hive-process.xml
+++ /dev/null
@@ -1,46 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<process name="hive-process" xmlns="uri:falcon:process:0.1">
- <!-- where -->
- <clusters>
- <cluster name="corp">
- <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
- </cluster>
- </clusters>
-
- <!-- when -->
- <parallel>1</parallel>
- <order>LIFO</order>
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
-
- <!-- what -->
- <inputs>
- <input name="input" feed="clicks-raw-table" start="yesterday(0,0)" end="yesterday(20,0)"/>
- </inputs>
-
- <outputs>
- <output name="output" feed="clicks-summary-table" instance="today(0,0)"/>
- </outputs>
-
- <!-- how -->
- <workflow engine="hive" path="/apps/hive/script.hql"/>
-
- <retry policy="periodic" delay="minutes(10)" attempts="3"/>
-</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/process/pig-process-0.1.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/process/pig-process-0.1.xml b/process/src/test/resources/config/process/pig-process-0.1.xml
deleted file mode 100644
index 318f0da..0000000
--- a/process/src/test/resources/config/process/pig-process-0.1.xml
+++ /dev/null
@@ -1,53 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- ~ Licensed to the Apache Software Foundation (ASF) under one
- ~ or more contributor license agreements. See the NOTICE file
- ~ distributed with this work for additional information
- ~ regarding copyright ownership. The ASF licenses this file
- ~ to you under the Apache License, Version 2.0 (the
- ~ "License"); you may not use this file except in compliance
- ~ with the License. You may obtain a copy of the License at
- ~
- ~ http://www.apache.org/licenses/LICENSE-2.0
- ~
- ~ Unless required by applicable law or agreed to in writing, software
- ~ distributed under the License is distributed on an "AS IS" BASIS,
- ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ~ See the License for the specific language governing permissions and
- ~ limitations under the License.
- -->
-<process name="pig-process" xmlns="uri:falcon:process:0.1">
- <!-- where -->
- <clusters>
- <cluster name="corp">
- <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
- </cluster>
- </clusters>
-
- <!-- when -->
- <parallel>1</parallel>
- <order>LIFO</order>
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
-
- <!-- what -->
- <inputs>
- <input name="impression" feed="impressions" start="today(0,0)" end="today(0,2)"/>
- <input name="click" feed="clicks" start="yesterday(0,0)" end="latest(0)" partition="*/US"/>
- </inputs>
-
- <outputs>
- <output name="clicksummary" feed="impressions" instance="today(0,0)"/>
- </outputs>
-
- <properties>
- <property name="procprop" value="procprop"/>
- <property name="mapred.job.priority" value="LOW"/>
- </properties>
-
- <!-- how -->
- <workflow engine="pig" path="/apps/pig/id.pig"/>
-
- <retry policy="periodic" delay="hours(10)" attempts="3"/>
-
-</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/process/pig-process-table.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/process/pig-process-table.xml b/process/src/test/resources/config/process/pig-process-table.xml
deleted file mode 100644
index 37aca10..0000000
--- a/process/src/test/resources/config/process/pig-process-table.xml
+++ /dev/null
@@ -1,46 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<process name="table-process" xmlns="uri:falcon:process:0.1">
- <!-- where -->
- <clusters>
- <cluster name="corp">
- <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
- </cluster>
- </clusters>
-
- <!-- when -->
- <parallel>1</parallel>
- <order>LIFO</order>
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
-
- <!-- what -->
- <inputs>
- <input name="input" feed="clicks-raw-table" start="yesterday(0,0)" end="yesterday(20,0)"/>
- </inputs>
-
- <outputs>
- <output name="output" feed="clicks-summary-table" instance="today(0,0)"/>
- </outputs>
-
- <!-- how -->
- <workflow engine="pig" path="/apps/pig/id.pig"/>
-
- <retry policy="periodic" delay="minutes(10)" attempts="3"/>
-</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/resources/config/process/process-0.1.xml
----------------------------------------------------------------------
diff --git a/process/src/test/resources/config/process/process-0.1.xml b/process/src/test/resources/config/process/process-0.1.xml
deleted file mode 100644
index 6148441..0000000
--- a/process/src/test/resources/config/process/process-0.1.xml
+++ /dev/null
@@ -1,45 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!-- ~ Licensed to the Apache Software Foundation (ASF) under one ~ or more contributor license agreements. See the NOTICE file ~ distributed with this work for additional information
- ~ regarding copyright ownership. The ASF licenses this file ~ to you under the Apache License, Version 2.0 (the ~ "License"); you may not use this file except in compliance ~ with the
- License. You may obtain a copy of the License at ~ ~ http://www.apache.org/licenses/LICENSE-2.0 ~ ~ Unless required by applicable law or agreed to in writing, software ~ distributed
- under the License is distributed on an "AS IS" BASIS, ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ~ See the License for the specific language governing
- permissions and ~ limitations under the License. -->
-<process name="sample" xmlns="uri:falcon:process:0.1">
- <!-- where -->
- <clusters>
- <cluster name="corp">
- <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
- </cluster>
- </clusters>
-
- <!-- when -->
- <parallel>1</parallel>
- <order>LIFO</order>
- <frequency>hours(1)</frequency>
- <timezone>UTC</timezone>
-
- <!-- what -->
- <inputs>
- <input name="impression" feed="impressions" start="today(0,0)" end="today(0,2)"/>
- <input name="click" feed="clicks" start="yesterday(0,0)" end="latest(0)" partition="*/US"/>
- </inputs>
-
- <outputs>
- <output name="clicksummary" feed="impressions" instance="today(0,0)"/>
- </outputs>
-
- <!-- how -->
- <properties>
- <property name="procprop" value="procprop"/>
- <property name="mapred.job.priority" value="LOW"/>
- </properties>
-
- <workflow name="test" version="1.0.0" engine="oozie" path="/user/guest/workflow"/>
-
- <retry policy="periodic" delay="hours(10)" attempts="3"/>
-
- <late-process policy="exp-backoff" delay="hours(1)">
- <late-input input="impression" workflow-path="hdfs://impression/late/workflow"/>
- <late-input input="click" workflow-path="hdfs://clicks/late/workflow"/>
- </late-process>
-</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/src/main/examples/app/hive/wordcount.hql
----------------------------------------------------------------------
diff --git a/src/main/examples/app/hive/wordcount.hql b/src/main/examples/app/hive/wordcount.hql
index 62d4f32..4c101c5 100644
--- a/src/main/examples/app/hive/wordcount.hql
+++ b/src/main/examples/app/hive/wordcount.hql
@@ -16,4 +16,4 @@
-- limitations under the License.
--
-FROM ${falcon_input_table} INSERT OVERWRITE TABLE ${falcon_output_table} PARTITION(${falcon_output_partitions}) SELECT word, SUM(cnt) as cnt WHERE ${falcon_input_filter} GROUP BY word;
+FROM ${falcon_inparts_table} INSERT OVERWRITE TABLE ${falcon_outpart_table} PARTITION(${falcon_outpart_partitions}) SELECT word, SUM(cnt) as cnt WHERE ${falcon_inparts_filter} GROUP BY word;
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/src/main/examples/app/pig/hcat-wordcount.pig
----------------------------------------------------------------------
diff --git a/src/main/examples/app/pig/hcat-wordcount.pig b/src/main/examples/app/pig/hcat-wordcount.pig
index 023ce3d..3df93c0 100644
--- a/src/main/examples/app/pig/hcat-wordcount.pig
+++ b/src/main/examples/app/pig/hcat-wordcount.pig
@@ -16,8 +16,8 @@
* limitations under the License.
*/
-indata = LOAD '$falcon_input_table' USING org.apache.hive.hcatalog.pig.HCatLoader();
-filterdata = FILTER indata BY $falcon_input_filter;
+indata = LOAD '$falcon_inparts_table' USING org.apache.hive.hcatalog.pig.HCatLoader();
+filterdata = FILTER indata BY $falcon_inparts_filter;
grpdata = GROUP filterdata BY (word);
finaldata = FOREACH grpdata GENERATE FLATTEN(group) as word, (int)SUM(filterdata.cnt) as cnt;
-STORE finaldata INTO '$falcon_output_table' USING org.apache.hive.hcatalog.pig.HCatStorer('$falcon_output_dataout_partitions');
+STORE finaldata INTO '$falcon_outpart_table' USING org.apache.hive.hcatalog.pig.HCatStorer('$falcon_outpart_partitions');
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/src/main/examples/data/hcat-generate.sh
----------------------------------------------------------------------
diff --git a/src/main/examples/data/hcat-generate.sh b/src/main/examples/data/hcat-generate.sh
index 957710a..5c4b40a 100644
--- a/src/main/examples/data/hcat-generate.sh
+++ b/src/main/examples/data/hcat-generate.sh
@@ -34,8 +34,10 @@ BASEDIR=`cd ${BASEDIR};pwd`
${BASEDIR}/generate.sh
hcat -e "DROP TABLE IF EXISTS in_table"
+hcat -e "DROP TABLE IF EXISTS repl_in_table"
hcat -e "DROP TABLE IF EXISTS out_table"
hcat -e "CREATE TABLE in_table (word STRING, cnt INT) PARTITIONED BY (ds STRING);"
+hcat -e "CREATE TABLE repl_in_table (word STRING, cnt INT) PARTITIONED BY (ds STRING);"
hcat -e "CREATE TABLE out_table (word STRING, cnt INT) PARTITIONED BY (ds STRING);"
for MINUTE in `seq -w 00 59`
do
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/src/main/examples/entity/filesystem/pig-process.xml
----------------------------------------------------------------------
diff --git a/src/main/examples/entity/filesystem/pig-process.xml b/src/main/examples/entity/filesystem/pig-process.xml
index 0dbb558..99e03bd 100644
--- a/src/main/examples/entity/filesystem/pig-process.xml
+++ b/src/main/examples/entity/filesystem/pig-process.xml
@@ -31,12 +31,12 @@
<inputs>
<!-- In the pig script, the input paths will be available in a variable 'inpaths' -->
- <input name="input" feed="in" start="now(0,-5)" end="now(0,-1)"/>
+ <input name="inpaths" feed="in" start="now(0,-5)" end="now(0,-1)"/>
</inputs>
<outputs>
<!-- In the pig script, the output path will be available in a variable 'outpath' -->
- <output name="output" feed="out" instance="now(0,0)"/>
+ <output name="outpath" feed="out" instance="now(0,0)"/>
</outputs>
<workflow engine="pig" path="/app/pig/wordcount.pig"/>
[3/9] FALCON-369 Refactor workflow builder. Contributed by Shwetha GS
Posted by sh...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/feed/src-cluster.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/feed/src-cluster.xml b/oozie/src/test/resources/feed/src-cluster.xml
new file mode 100644
index 0000000..730f8d2
--- /dev/null
+++ b/oozie/src/test/resources/feed/src-cluster.xml
@@ -0,0 +1,40 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<cluster colo="gs1" description="" name="corp1" xmlns="uri:falcon:cluster:0.1"
+ >
+ <interfaces>
+ <interface type="readonly" endpoint="http://localhost:50070"
+ version="0.20.2"/>
+ <interface type="write" endpoint="hdfs://localhost:8020"
+ version="0.20.2"/>
+ <interface type="execute" endpoint="localhost:8021" version="0.20.2"/>
+ <interface type="workflow" endpoint="http://localhost:11000/oozie/"
+ version="3.1"/>
+ <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true"
+ version="5.1.6"/>
+ <interface type="registry" endpoint="thrift://localhost:49093" version="1"/>
+ </interfaces>
+ <locations>
+ <location name="temp" path="/tmp"/>
+ <location name="working" path="/projects/falcon/working"/>
+ <location name="staging" path="/projects/falcon/staging"/>
+ </locations>
+ <properties>
+ <property name="separator" value="-"/>
+ </properties>
+</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/feed/table-replication-feed.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/feed/table-replication-feed.xml b/oozie/src/test/resources/feed/table-replication-feed.xml
new file mode 100644
index 0000000..4c610f6
--- /dev/null
+++ b/oozie/src/test/resources/feed/table-replication-feed.xml
@@ -0,0 +1,42 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<feed description="clicks log" name="raw-logs-table" xmlns="uri:falcon:feed:0.1">
+
+ <frequency>minutes(20)</frequency>
+ <timezone>UTC</timezone>
+
+ <clusters>
+ <cluster name="corp1" type="source" delay="minutes(40)">
+ <validity start="2010-01-01T00:00Z" end="2010-01-01T02:00Z"/>
+ <retention limit="minutes(5)" action="delete"/>
+ <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ <cluster name="corp2" type="target">
+ <validity start="2010-01-01T00:00Z" end="2010-01-01T02:00Z"/>
+ <retention limit="minutes(7)" action="delete"/>
+ <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ <table uri="catalog:target_db:target_clicks_table#ds=${YEAR}${MONTH}${DAY};region=${region}" />
+ </cluster>
+ </clusters>
+
+ <table uri="catalog:source_db:source_clicks_table#ds=${YEAR}${MONTH}${DAY};region=${region}" />
+
+ <ACL owner="testuser" group="group" permission="0x755"/>
+ <schema location="/schema/clicks" provider="protobuf"/>
+
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/feed/trg-cluster-alpha.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/feed/trg-cluster-alpha.xml b/oozie/src/test/resources/feed/trg-cluster-alpha.xml
new file mode 100644
index 0000000..1fb07cb
--- /dev/null
+++ b/oozie/src/test/resources/feed/trg-cluster-alpha.xml
@@ -0,0 +1,39 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<cluster colo="ua1" description="" name="alpha" xmlns="uri:falcon:cluster:0.1">
+ <interfaces>
+ <interface type="readonly" endpoint="http://localhost:50070"
+ version="0.20.2"/>
+ <interface type="write" endpoint="hdfs://localhost:8020"
+ version="0.20.2"/>
+ <interface type="execute" endpoint="localhost:8021" version="0.20.2"/>
+ <interface type="workflow" endpoint="http://localhost:11000/oozie/"
+ version="3.1"/>
+ <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true"
+ version="5.1.6"/>
+ <interface type="registry" endpoint="thrift://localhost:59093" version="1"/>
+ </interfaces>
+ <locations>
+ <location name="temp" path="/tmp"/>
+ <location name="working" path="/projects/falcon/working"/>
+ <location name="staging" path="/projects/falcon/staging2"/>
+ </locations>
+ <properties>
+ <property name="separator" value="-"/>
+ </properties>
+</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/feed/trg-cluster-beta.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/feed/trg-cluster-beta.xml b/oozie/src/test/resources/feed/trg-cluster-beta.xml
new file mode 100644
index 0000000..0bf0bcd
--- /dev/null
+++ b/oozie/src/test/resources/feed/trg-cluster-beta.xml
@@ -0,0 +1,39 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<cluster colo="ua2" description="" name="beta" xmlns="uri:falcon:cluster:0.1">
+ <interfaces>
+ <interface type="readonly" endpoint="http://localhost:50070"
+ version="0.20.2"/>
+ <interface type="write" endpoint="hdfs://localhost:8020"
+ version="0.20.2"/>
+ <interface type="execute" endpoint="localhost:8021" version="0.20.2"/>
+ <interface type="workflow" endpoint="http://localhost:11000/oozie/"
+ version="3.1"/>
+ <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true"
+ version="5.1.6"/>
+ <interface type="registry" endpoint="thrift://localhost:59093" version="1"/>
+ </interfaces>
+ <locations>
+ <location name="temp" path="/tmp"/>
+ <location name="working" path="/projects/falcon/working"/>
+ <location name="staging" path="/projects/falcon/staging2"/>
+ </locations>
+ <properties>
+ <property name="separator" value="-"/>
+ </properties>
+</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/feed/trg-cluster.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/feed/trg-cluster.xml b/oozie/src/test/resources/feed/trg-cluster.xml
new file mode 100644
index 0000000..8260fda
--- /dev/null
+++ b/oozie/src/test/resources/feed/trg-cluster.xml
@@ -0,0 +1,40 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<cluster colo="gs2" description="" name="corp2" xmlns="uri:falcon:cluster:0.1"
+ >
+ <interfaces>
+ <interface type="readonly" endpoint="http://localhost:50070"
+ version="0.20.2"/>
+ <interface type="write" endpoint="hdfs://localhost:8020"
+ version="0.20.2"/>
+ <interface type="execute" endpoint="localhost:8021" version="0.20.2"/>
+ <interface type="workflow" endpoint="http://localhost:11000/oozie/"
+ version="3.1"/>
+ <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true"
+ version="5.1.6"/>
+ <interface type="registry" endpoint="thrift://localhost:59093" version="1"/>
+ </interfaces>
+ <locations>
+ <location name="temp" path="/tmp"/>
+ <location name="working" path="/projects/falcon/working"/>
+ <location name="staging" path="/projects/falcon/staging2"/>
+ </locations>
+ <properties>
+ <property name="separator" value="-"/>
+ </properties>
+</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 7130352..25c498e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -425,9 +425,7 @@
<module>test-util</module>
<module>hadoop-dependencies</module>
<module>hadoop-webapp</module>
- <module>feed</module>
<module>messaging</module>
- <module>process</module>
<module>oozie-el-extensions</module>
<module>oozie</module>
<module>acquisition</module>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/prism/src/main/java/org/apache/falcon/util/EmbeddedServer.java
----------------------------------------------------------------------
diff --git a/prism/src/main/java/org/apache/falcon/util/EmbeddedServer.java b/prism/src/main/java/org/apache/falcon/util/EmbeddedServer.java
index a9c9643..b3a5524 100644
--- a/prism/src/main/java/org/apache/falcon/util/EmbeddedServer.java
+++ b/prism/src/main/java/org/apache/falcon/util/EmbeddedServer.java
@@ -48,6 +48,7 @@ public class EmbeddedServer {
public void start() throws Exception {
Services.get().reset();
server.start();
+ server.join();
}
public void stop() throws Exception {
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/pom.xml
----------------------------------------------------------------------
diff --git a/process/pom.xml b/process/pom.xml
deleted file mode 100644
index c1ee74d..0000000
--- a/process/pom.xml
+++ /dev/null
@@ -1,118 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-
- <modelVersion>4.0.0</modelVersion>
- <parent>
- <groupId>org.apache.falcon</groupId>
- <artifactId>falcon-main</artifactId>
- <version>0.6-incubating-SNAPSHOT</version>
- </parent>
- <artifactId>falcon-process</artifactId>
- <description>Apache Falcon Process Module</description>
- <name>Apache Falcon Process</name>
- <packaging>jar</packaging>
-
- <profiles>
- <profile>
- <id>hadoop-1</id>
- <activation>
- <activeByDefault>true</activeByDefault>
- </activation>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-test</artifactId>
- </dependency>
- </dependencies>
- </profile>
- <profile>
- <id>hadoop-2</id>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-hdfs</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-hdfs</artifactId>
- <classifier>tests</classifier>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- <classifier>tests</classifier>
- </dependency>
- </dependencies>
- </profile>
- </profiles>
-
- <dependencies>
- <dependency>
- <groupId>commons-cli</groupId>
- <artifactId>commons-cli</artifactId>
- </dependency>
-
- <dependency>
- <groupId>org.apache.falcon</groupId>
- <artifactId>falcon-oozie-adaptor</artifactId>
- </dependency>
-
- <dependency>
- <groupId>org.apache.falcon</groupId>
- <artifactId>falcon-common</artifactId>
- </dependency>
-
- <dependency>
- <groupId>org.apache.falcon</groupId>
- <artifactId>falcon-feed</artifactId>
- </dependency>
-
- <dependency>
- <groupId>org.apache.falcon</groupId>
- <artifactId>falcon-test-util</artifactId>
- <scope>test</scope>
- </dependency>
-
- <dependency>
- <groupId>org.apache.falcon</groupId>
- <artifactId>falcon-messaging</artifactId>
- </dependency>
-
- <dependency>
- <groupId>org.testng</groupId>
- <artifactId>testng</artifactId>
- </dependency>
-
- <dependency>
- <groupId>xerces</groupId>
- <artifactId>xercesImpl</artifactId>
- </dependency>
- </dependencies>
-</project>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/main/java/org/apache/falcon/workflow/OozieProcessWorkflowBuilder.java
----------------------------------------------------------------------
diff --git a/process/src/main/java/org/apache/falcon/workflow/OozieProcessWorkflowBuilder.java b/process/src/main/java/org/apache/falcon/workflow/OozieProcessWorkflowBuilder.java
deleted file mode 100644
index 3751f95..0000000
--- a/process/src/main/java/org/apache/falcon/workflow/OozieProcessWorkflowBuilder.java
+++ /dev/null
@@ -1,904 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.falcon.workflow;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.falcon.FalconException;
-import org.apache.falcon.Tag;
-import org.apache.falcon.entity.CatalogStorage;
-import org.apache.falcon.entity.ClusterHelper;
-import org.apache.falcon.entity.EntityUtil;
-import org.apache.falcon.entity.FeedHelper;
-import org.apache.falcon.entity.ProcessHelper;
-import org.apache.falcon.entity.Storage;
-import org.apache.falcon.entity.v0.EntityType;
-import org.apache.falcon.entity.v0.Frequency;
-import org.apache.falcon.entity.v0.Frequency.TimeUnit;
-import org.apache.falcon.entity.v0.SchemaHelper;
-import org.apache.falcon.entity.v0.cluster.Cluster;
-import org.apache.falcon.entity.v0.feed.Feed;
-import org.apache.falcon.entity.v0.feed.LocationType;
-import org.apache.falcon.entity.v0.process.EngineType;
-import org.apache.falcon.entity.v0.process.Input;
-import org.apache.falcon.entity.v0.process.Output;
-import org.apache.falcon.entity.v0.process.Process;
-import org.apache.falcon.entity.v0.process.Property;
-import org.apache.falcon.entity.v0.process.Workflow;
-import org.apache.falcon.expression.ExpressionHelper;
-import org.apache.falcon.hadoop.HadoopClientFactory;
-import org.apache.falcon.messaging.EntityInstanceMessage.ARG;
-import org.apache.falcon.oozie.coordinator.CONTROLS;
-import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
-import org.apache.falcon.oozie.coordinator.DATAIN;
-import org.apache.falcon.oozie.coordinator.DATAOUT;
-import org.apache.falcon.oozie.coordinator.DATASETS;
-import org.apache.falcon.oozie.coordinator.INPUTEVENTS;
-import org.apache.falcon.oozie.coordinator.OUTPUTEVENTS;
-import org.apache.falcon.oozie.coordinator.SYNCDATASET;
-import org.apache.falcon.oozie.coordinator.WORKFLOW;
-import org.apache.falcon.oozie.workflow.ACTION;
-import org.apache.falcon.oozie.workflow.DELETE;
-import org.apache.falcon.oozie.workflow.PIG;
-import org.apache.falcon.oozie.workflow.PREPARE;
-import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
-import org.apache.falcon.security.CurrentUser;
-import org.apache.falcon.update.UpdateHelper;
-import org.apache.falcon.util.OozieUtils;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.oozie.client.CoordinatorJob.Timeunit;
-import org.apache.oozie.client.OozieClient;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javax.xml.bind.JAXBElement;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-
-/**
- * Oozie workflow builder for falcon entities.
- */
-public class OozieProcessWorkflowBuilder extends OozieWorkflowBuilder<Process> {
- private static final Logger LOG = LoggerFactory.getLogger(OozieProcessWorkflowBuilder.class);
-
- private static final Set<String> FALCON_PROCESS_HIVE_ACTIONS = new HashSet<String>(
- Arrays.asList(new String[]{"recordsize", "user-oozie-workflow", "user-pig-job", "user-hive-job", }));
-
- public OozieProcessWorkflowBuilder(Process entity) {
- super(entity);
- }
-
- @Override
- public Map<String, Properties> newWorkflowSchedule(String... clusters) throws FalconException {
- Map<String, Properties> propertiesMap = new HashMap<String, Properties>();
-
- for (String clusterName : clusters) {
- org.apache.falcon.entity.v0.process.Cluster processCluster = ProcessHelper.getCluster(entity, clusterName);
- if (processCluster.getValidity().getStart().compareTo(processCluster.getValidity().getEnd()) >= 0) {
- LOG.info("process validity start <= end for cluster {}. Skipping schedule", clusterName);
- break;
- }
-
- Cluster cluster = CONFIG_STORE.get(EntityType.CLUSTER, processCluster.getName());
- Path bundlePath = EntityUtil.getNewStagingPath(cluster, entity);
- map(cluster, bundlePath);
- Properties properties = createAppProperties(clusterName, bundlePath, CurrentUser.getUser());
-
- //Add libpath
- String libPath = entity.getWorkflow().getLib();
- if (!StringUtils.isEmpty(libPath)) {
- String path = libPath.replace("${nameNode}", "");
- properties.put(OozieClient.LIBPATH, "${nameNode}" + path);
- }
-
- if (entity.getInputs() != null) {
- for (Input in : entity.getInputs().getInputs()) {
- if (in.isOptional()) {
- addOptionalInputProperties(properties, in, clusterName);
- }
- }
- }
- propertiesMap.put(clusterName, properties);
- }
- return propertiesMap;
- }
-
- private void addOptionalInputProperties(Properties properties, Input in, String clusterName)
- throws FalconException {
- Feed feed = EntityUtil.getEntity(EntityType.FEED, in.getFeed());
- org.apache.falcon.entity.v0.feed.Cluster cluster = FeedHelper.getCluster(feed, clusterName);
- String inName = in.getName();
- properties.put(inName + ".frequency", String.valueOf(feed.getFrequency().getFrequency()));
- properties.put(inName + ".freq_timeunit", mapToCoordTimeUnit(feed.getFrequency().getTimeUnit()).name());
- properties.put(inName + ".timezone", feed.getTimezone().getID());
- properties.put(inName + ".end_of_duration", Timeunit.NONE.name());
- properties.put(inName + ".initial-instance", SchemaHelper.formatDateUTC(cluster.getValidity().getStart()));
- properties.put(inName + ".done-flag", "notused");
-
- String locPath = FeedHelper.createStorage(clusterName, feed)
- .getUriTemplate(LocationType.DATA).replace('$', '%');
- properties.put(inName + ".uri-template", locPath);
-
- properties.put(inName + ".start-instance", in.getStart());
- properties.put(inName + ".end-instance", in.getEnd());
- }
-
- private Timeunit mapToCoordTimeUnit(TimeUnit tu) {
- switch (tu) {
- case days:
- return Timeunit.DAY;
-
- case hours:
- return Timeunit.HOUR;
-
- case minutes:
- return Timeunit.MINUTE;
-
- case months:
- return Timeunit.MONTH;
-
- default:
- throw new IllegalArgumentException("Unhandled time unit " + tu);
- }
- }
-
- @Override
- public Date getNextStartTime(Process process, String cluster, Date now) throws FalconException {
- org.apache.falcon.entity.v0.process.Cluster processCluster = ProcessHelper.getCluster(process, cluster);
- return EntityUtil.getNextStartTime(processCluster.getValidity().getStart(),
- process.getFrequency(), process.getTimezone(), now);
- }
-
- @Override
- public String[] getWorkflowNames() {
- return new String[]{EntityUtil.getWorkflowName(Tag.DEFAULT, entity).toString()};
- }
-
- private static final String DEFAULT_WF_TEMPLATE = "/config/workflow/process-parent-workflow.xml";
- private static final int THIRTY_MINUTES = 30 * 60 * 1000;
-
- @Override
- public List<COORDINATORAPP> getCoordinators(Cluster cluster, Path bundlePath) throws FalconException {
- try {
- FileSystem fs = HadoopClientFactory.get().createFileSystem(ClusterHelper.getConfiguration(cluster));
-
- //Copy user workflow and lib to staging dir
- Map<String, String> checksums = UpdateHelper.checksumAndCopy(fs, new Path(entity.getWorkflow().getPath()),
- new Path(bundlePath, EntityUtil.PROCESS_USER_DIR));
- if (entity.getWorkflow().getLib() != null && fs.exists(new Path(entity.getWorkflow().getLib()))) {
- checksums.putAll(UpdateHelper.checksumAndCopy(fs, new Path(entity.getWorkflow().getLib()),
- new Path(bundlePath, EntityUtil.PROCESS_USERLIB_DIR)));
- }
-
- writeChecksums(fs, new Path(bundlePath, EntityUtil.PROCESS_CHECKSUM_FILE), checksums);
- } catch (IOException e) {
- throw new FalconException("Failed to copy user workflow/lib", e);
- }
-
- List<COORDINATORAPP> apps = new ArrayList<COORDINATORAPP>();
- apps.add(createDefaultCoordinator(cluster, bundlePath));
-
- return apps;
- }
-
- private void writeChecksums(FileSystem fs, Path path, Map<String, String> checksums) throws FalconException {
- try {
- FSDataOutputStream stream = fs.create(path);
- try {
- for (Map.Entry<String, String> entry : checksums.entrySet()) {
- stream.write((entry.getKey() + "=" + entry.getValue() + "\n").getBytes());
- }
- } finally {
- stream.close();
- }
- } catch (IOException e) {
- throw new FalconException("Failed to copy user workflow/lib", e);
- }
- }
-
- private Path getUserWorkflowPath(Cluster cluster, Path bundlePath) throws FalconException {
- try {
- FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(ClusterHelper.getConfiguration(cluster));
- Path wfPath = new Path(entity.getWorkflow().getPath());
- if (fs.isFile(wfPath)) {
- return new Path(bundlePath, EntityUtil.PROCESS_USER_DIR + "/" + wfPath.getName());
- } else {
- return new Path(bundlePath, EntityUtil.PROCESS_USER_DIR);
- }
- } catch(IOException e) {
- throw new FalconException("Failed to get workflow path", e);
- }
- }
-
- private Path getUserLibPath(Cluster cluster, Path bundlePath) throws FalconException {
- try {
- if (entity.getWorkflow().getLib() == null) {
- return null;
- }
- Path libPath = new Path(entity.getWorkflow().getLib());
-
- FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(ClusterHelper.getConfiguration(cluster));
- if (fs.isFile(libPath)) {
- return new Path(bundlePath, EntityUtil.PROCESS_USERLIB_DIR + "/" + libPath.getName());
- } else {
- return new Path(bundlePath, EntityUtil.PROCESS_USERLIB_DIR);
- }
- } catch(IOException e) {
- throw new FalconException("Failed to get user lib path", e);
- }
- }
-
- /**
- * Creates default oozie coordinator.
- *
- * @param cluster - Cluster for which the coordinator app need to be created
- * @param bundlePath - bundle path
- * @return COORDINATORAPP
- * @throws FalconException on Error
- */
- public COORDINATORAPP createDefaultCoordinator(Cluster cluster, Path bundlePath) throws FalconException {
- if (entity == null) {
- return null;
- }
-
- COORDINATORAPP coord = new COORDINATORAPP();
- String coordName = EntityUtil.getWorkflowName(Tag.DEFAULT, entity).toString();
- Path coordPath = getCoordPath(bundlePath, coordName);
-
- // coord attributes
- initializeCoordAttributes(cluster, entity, coord, coordName);
-
- CONTROLS controls = initializeControls(entity); // controls
- coord.setControls(controls);
-
- // Configuration
- Map<String, String> props = createCoordDefaultConfiguration(cluster, coordName);
-
- initializeInputPaths(cluster, entity, coord, props); // inputs
- initializeOutputPaths(cluster, entity, coord, props); // outputs
-
- Workflow processWorkflow = entity.getWorkflow();
- propagateUserWorkflowProperties(processWorkflow, props, entity.getName());
-
- // create parent wf
- createWorkflow(cluster, entity, processWorkflow, coordName, coordPath);
-
- WORKFLOW wf = new WORKFLOW();
- wf.setAppPath(getStoragePath(coordPath.toString()));
- wf.setConfiguration(getCoordConfig(props));
-
- // set coord action to parent wf
- org.apache.falcon.oozie.coordinator.ACTION action = new org.apache.falcon.oozie.coordinator.ACTION();
- action.setWorkflow(wf);
- coord.setAction(action);
-
- return coord;
- }
-
- private void initializeCoordAttributes(Cluster cluster, Process process, COORDINATORAPP coord, String coordName) {
- coord.setName(coordName);
- org.apache.falcon.entity.v0.process.Cluster processCluster =
- ProcessHelper.getCluster(process, cluster.getName());
- coord.setStart(SchemaHelper.formatDateUTC(processCluster.getValidity().getStart()));
- coord.setEnd(SchemaHelper.formatDateUTC(processCluster.getValidity().getEnd()));
- coord.setTimezone(process.getTimezone().getID());
- coord.setFrequency("${coord:" + process.getFrequency().toString() + "}");
- }
-
- private CONTROLS initializeControls(Process process)
- throws FalconException {
- CONTROLS controls = new CONTROLS();
- controls.setConcurrency(String.valueOf(process.getParallel()));
- controls.setExecution(process.getOrder().name());
-
- Frequency timeout = process.getTimeout();
- long frequencyInMillis = ExpressionHelper.get().evaluate(process.getFrequency().toString(), Long.class);
- long timeoutInMillis;
- if (timeout != null) {
- timeoutInMillis = ExpressionHelper.get().
- evaluate(process.getTimeout().toString(), Long.class);
- } else {
- timeoutInMillis = frequencyInMillis * 6;
- if (timeoutInMillis < THIRTY_MINUTES) {
- timeoutInMillis = THIRTY_MINUTES;
- }
- }
- controls.setTimeout(String.valueOf(timeoutInMillis / (1000 * 60)));
-
- if (timeoutInMillis / frequencyInMillis * 2 > 0) {
- controls.setThrottle(String.valueOf(timeoutInMillis / frequencyInMillis * 2));
- }
-
- return controls;
- }
-
- private void initializeInputPaths(Cluster cluster, Process process, COORDINATORAPP coord,
- Map<String, String> props) throws FalconException {
- if (process.getInputs() == null) {
- props.put("falconInputFeeds", "NONE");
- props.put("falconInPaths", IGNORE);
- return;
- }
-
- List<String> inputFeeds = new ArrayList<String>();
- List<String> inputPaths = new ArrayList<String>();
- List<String> inputFeedStorageTypes = new ArrayList<String>();
- for (Input input : process.getInputs().getInputs()) {
- Feed feed = EntityUtil.getEntity(EntityType.FEED, input.getFeed());
- Storage storage = FeedHelper.createStorage(cluster, feed);
-
- if (!input.isOptional()) {
- if (coord.getDatasets() == null) {
- coord.setDatasets(new DATASETS());
- }
- if (coord.getInputEvents() == null) {
- coord.setInputEvents(new INPUTEVENTS());
- }
-
- SYNCDATASET syncdataset = createDataSet(feed, cluster, storage, input.getName(), LocationType.DATA);
- coord.getDatasets().getDatasetOrAsyncDataset().add(syncdataset);
-
- DATAIN datain = createDataIn(input);
- coord.getInputEvents().getDataIn().add(datain);
- }
-
- String inputExpr = null;
- if (storage.getType() == Storage.TYPE.FILESYSTEM) {
- inputExpr = getELExpression("dataIn('" + input.getName() + "', '" + input.getPartition() + "')");
- props.put(input.getName(), inputExpr);
- } else if (storage.getType() == Storage.TYPE.TABLE) {
- inputExpr = "${coord:dataIn('" + input.getName() + "')}";
- propagateCatalogTableProperties(input, (CatalogStorage) storage, props);
- }
-
- inputFeeds.add(feed.getName());
- inputPaths.add(inputExpr);
- inputFeedStorageTypes.add(storage.getType().name());
- }
-
- propagateLateDataProperties(inputFeeds, inputPaths, inputFeedStorageTypes, props);
- }
-
- private void propagateLateDataProperties(List<String> inputFeeds, List<String> inputPaths,
- List<String> inputFeedStorageTypes, Map<String, String> props) {
- // populate late data handler - should-record action
- props.put("falconInputFeeds", join(inputFeeds.iterator(), '#'));
- props.put("falconInPaths", join(inputPaths.iterator(), '#'));
-
- // storage type for each corresponding feed sent as a param to LateDataHandler
- // needed to compute usage based on storage type in LateDataHandler
- props.put("falconInputFeedStorageTypes", join(inputFeedStorageTypes.iterator(), '#'));
- }
-
- private void initializeOutputPaths(Cluster cluster, Process process, COORDINATORAPP coord,
- Map<String, String> props) throws FalconException {
- if (process.getOutputs() == null) {
- props.put(ARG.feedNames.getPropName(), "NONE");
- props.put(ARG.feedInstancePaths.getPropName(), IGNORE);
- return;
- }
-
- if (coord.getDatasets() == null) {
- coord.setDatasets(new DATASETS());
- }
-
- if (coord.getOutputEvents() == null) {
- coord.setOutputEvents(new OUTPUTEVENTS());
- }
-
- List<String> outputFeeds = new ArrayList<String>();
- List<String> outputPaths = new ArrayList<String>();
- for (Output output : process.getOutputs().getOutputs()) {
- Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());
- Storage storage = FeedHelper.createStorage(cluster, feed);
-
- SYNCDATASET syncdataset = createDataSet(feed, cluster, storage, output.getName(), LocationType.DATA);
- coord.getDatasets().getDatasetOrAsyncDataset().add(syncdataset);
-
- DATAOUT dataout = createDataOut(output);
- coord.getOutputEvents().getDataOut().add(dataout);
-
- String outputExpr = "${coord:dataOut('" + output.getName() + "')}";
- outputFeeds.add(feed.getName());
- outputPaths.add(outputExpr);
-
- if (storage.getType() == Storage.TYPE.FILESYSTEM) {
- props.put(output.getName(), outputExpr);
-
- propagateFileSystemProperties(output, feed, cluster, coord, storage, props);
- } else if (storage.getType() == Storage.TYPE.TABLE) {
- propagateCatalogTableProperties(output, (CatalogStorage) storage, props);
- }
- }
-
- // Output feed name and path for parent workflow
- props.put(ARG.feedNames.getPropName(), join(outputFeeds.iterator(), ','));
- props.put(ARG.feedInstancePaths.getPropName(), join(outputPaths.iterator(), ','));
- }
-
- private SYNCDATASET createDataSet(Feed feed, Cluster cluster, Storage storage,
- String datasetName, LocationType locationType) throws FalconException {
-
- SYNCDATASET syncdataset = new SYNCDATASET();
- syncdataset.setName(datasetName);
- syncdataset.setFrequency("${coord:" + feed.getFrequency().toString() + "}");
-
- String uriTemplate = storage.getUriTemplate(locationType);
- if (storage.getType() == Storage.TYPE.TABLE) {
- uriTemplate = uriTemplate.replace("thrift", "hcat"); // Oozie requires this!!!
- }
- syncdataset.setUriTemplate(uriTemplate);
-
- org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName());
- syncdataset.setInitialInstance(SchemaHelper.formatDateUTC(feedCluster.getValidity().getStart()));
- syncdataset.setTimezone(feed.getTimezone().getID());
-
- if (feed.getAvailabilityFlag() == null) {
- syncdataset.setDoneFlag("");
- } else {
- syncdataset.setDoneFlag(feed.getAvailabilityFlag());
- }
-
- return syncdataset;
- }
-
- private DATAOUT createDataOut(Output output) {
- DATAOUT dataout = new DATAOUT();
- dataout.setName(output.getName());
- dataout.setDataset(output.getName());
- dataout.setInstance(getELExpression(output.getInstance()));
- return dataout;
- }
-
- private DATAIN createDataIn(Input input) {
- DATAIN datain = new DATAIN();
- datain.setName(input.getName());
- datain.setDataset(input.getName());
- datain.setStartInstance(getELExpression(input.getStart()));
- datain.setEndInstance(getELExpression(input.getEnd()));
- return datain;
- }
-
- private void propagateFileSystemProperties(Output output, Feed feed, Cluster cluster, COORDINATORAPP coord,
- Storage storage, Map<String, String> props)
- throws FalconException {
-
- // stats and meta paths
- createOutputEvent(output, feed, cluster, LocationType.STATS, coord, props, storage);
- createOutputEvent(output, feed, cluster, LocationType.META, coord, props, storage);
- createOutputEvent(output, feed, cluster, LocationType.TMP, coord, props, storage);
- }
-
- //SUSPEND CHECKSTYLE CHECK ParameterNumberCheck
- private void createOutputEvent(Output output, Feed feed, Cluster cluster, LocationType locType,
- COORDINATORAPP coord, Map<String, String> props, Storage storage)
- throws FalconException {
-
- String name = output.getName();
- String type = locType.name().toLowerCase();
-
- SYNCDATASET dataset = createDataSet(feed, cluster, storage, name + type, locType);
- coord.getDatasets().getDatasetOrAsyncDataset().add(dataset);
-
- DATAOUT dataout = new DATAOUT();
- dataout.setName(name + type);
- dataout.setDataset(name + type);
- dataout.setInstance(getELExpression(output.getInstance()));
-
- OUTPUTEVENTS outputEvents = coord.getOutputEvents();
- if (outputEvents == null) {
- outputEvents = new OUTPUTEVENTS();
- coord.setOutputEvents(outputEvents);
- }
- outputEvents.getDataOut().add(dataout);
-
- String outputExpr = "${coord:dataOut('" + name + type + "')}";
- props.put(name + "." + type, outputExpr);
- }
- //RESUME CHECKSTYLE CHECK ParameterNumberCheck
-
- private void propagateCommonCatalogTableProperties(CatalogStorage tableStorage,
- Map<String, String> props, String prefix) {
- props.put(prefix + "_storage_type", tableStorage.getType().name());
- props.put(prefix + "_catalog_url", tableStorage.getCatalogUrl());
- props.put(prefix + "_database", tableStorage.getDatabase());
- props.put(prefix + "_table", tableStorage.getTable());
- }
-
- private void propagateCatalogTableProperties(Input input, CatalogStorage tableStorage,
- Map<String, String> props) {
- String prefix = "falcon_" + input.getName();
-
- propagateCommonCatalogTableProperties(tableStorage, props, prefix);
-
- props.put(prefix + "_partition_filter_pig",
- "${coord:dataInPartitionFilter('" + input.getName() + "', 'pig')}");
- props.put(prefix + "_partition_filter_hive",
- "${coord:dataInPartitionFilter('" + input.getName() + "', 'hive')}");
- props.put(prefix + "_partition_filter_java",
- "${coord:dataInPartitionFilter('" + input.getName() + "', 'java')}");
- props.put(prefix + "_datain_partitions_hive",
- "${coord:dataInPartitions('" + input.getName() + "', 'hive-export')}");
- }
-
- private void propagateCatalogTableProperties(Output output, CatalogStorage tableStorage,
- Map<String, String> props) {
- String prefix = "falcon_" + output.getName();
-
- propagateCommonCatalogTableProperties(tableStorage, props, prefix);
-
- props.put(prefix + "_dataout_partitions",
- "${coord:dataOutPartitions('" + output.getName() + "')}");
- props.put(prefix + "_dated_partition_value", "${coord:dataOutPartitionValue('"
- + output.getName() + "', '" + tableStorage.getDatedPartitionKey() + "')}");
- }
-
- private String join(Iterator<String> itr, char sep) {
- String joinedStr = StringUtils.join(itr, sep);
- if (joinedStr.isEmpty()) {
- joinedStr = "null";
- }
- return joinedStr;
- }
-
- private String getELExpression(String expr) {
- if (expr != null) {
- expr = "${" + expr + "}";
- }
- return expr;
- }
-
- @Override
- protected Map<String, String> getEntityProperties() {
- Map<String, String> props = new HashMap<String, String>();
- if (entity.getProperties() != null) {
- for (Property prop : entity.getProperties().getProperties()) {
- props.put(prop.getName(), prop.getValue());
- }
- }
- return props;
- }
-
- private void propagateUserWorkflowProperties(Workflow processWorkflow,
- Map<String, String> props, String processName) {
- props.put("userWorkflowName", ProcessHelper.getProcessWorkflowName(
- processWorkflow.getName(), processName));
- props.put("userWorkflowVersion", processWorkflow.getVersion());
- props.put("userWorkflowEngine", processWorkflow.getEngine().value());
- }
-
- protected void createWorkflow(Cluster cluster, Process process, Workflow processWorkflow,
- String wfName, Path parentWfPath) throws FalconException {
- WORKFLOWAPP wfApp = getWorkflowTemplate(DEFAULT_WF_TEMPLATE);
- wfApp.setName(wfName);
- try {
- addLibExtensionsToWorkflow(cluster, wfApp, EntityType.PROCESS, null);
- } catch (IOException e) {
- throw new FalconException("Failed to add library extensions for the workflow", e);
- }
-
- final boolean shouldConfigureHive = shouldSetupHiveConfiguration(cluster, process);
- if (shouldConfigureHive) {
- setupHiveCredentials(cluster, parentWfPath, wfApp);
- }
-
- String userWfPath = getUserWorkflowPath(cluster, parentWfPath.getParent()).toString();
- EngineType engineType = processWorkflow.getEngine();
- for (Object object : wfApp.getDecisionOrForkOrJoin()) {
- if (!(object instanceof ACTION)) {
- continue;
- }
-
- ACTION action = (ACTION) object;
- String actionName = action.getName();
- if (engineType == EngineType.OOZIE && actionName.equals("user-oozie-workflow")) {
- action.getSubWorkflow().setAppPath("${nameNode}" + userWfPath);
- } else if (engineType == EngineType.PIG && actionName.equals("user-pig-job")) {
- decoratePIGAction(cluster, process, action.getPig(), parentWfPath, shouldConfigureHive);
- } else if (engineType == EngineType.HIVE && actionName.equals("user-hive-job")) {
- decorateHiveAction(cluster, process, action, parentWfPath);
- } else if (FALCON_ACTIONS.contains(actionName)) {
- decorateWithOozieRetries(action);
- if (shouldConfigureHive && actionName.equals("recordsize")) {
- // adds hive-site.xml in actions classpath
- action.getJava().setJobXml("${wf:appPath()}/conf/hive-site.xml");
- }
- }
- }
-
- //Create parent workflow
- marshal(cluster, wfApp, parentWfPath);
- }
-
- protected boolean shouldSetupHiveConfiguration(Cluster cluster,
- Process process) throws FalconException {
- return isTableStorageType(cluster, entity)
- || EngineType.HIVE == process.getWorkflow().getEngine();
- }
-
- protected boolean isTableStorageType(Cluster cluster, Process process) throws FalconException {
- Storage.TYPE storageType = ProcessHelper.getStorageType(cluster, process);
- return Storage.TYPE.TABLE == storageType;
- }
-
- private void setupHiveCredentials(Cluster cluster, Path parentWfPath,
- WORKFLOWAPP wfApp) throws FalconException {
- // create hive-site.xml file so actions can use it in the classpath
- createHiveConfiguration(cluster, parentWfPath, ""); // DO NOT ADD PREFIX!!!
-
- if (isSecurityEnabled) {
- // add hcatalog credentials for secure mode and add a reference to each action
- addHCatalogCredentials(wfApp, cluster, HIVE_CREDENTIAL_NAME, FALCON_PROCESS_HIVE_ACTIONS);
- }
- }
-
- private void decoratePIGAction(Cluster cluster, Process process, PIG pigAction,
- Path parentWfPath, boolean shouldConfigureHive) throws FalconException {
- Path userWfPath = getUserWorkflowPath(cluster, parentWfPath.getParent());
- pigAction.setScript("${nameNode}" + userWfPath.toString());
-
- addPrepareDeleteOutputPath(process, pigAction);
-
- final List<String> paramList = pigAction.getParam();
- addInputFeedsAsParams(paramList, process, cluster, EngineType.PIG.name().toLowerCase());
- addOutputFeedsAsParams(paramList, process, cluster);
-
- propagateProcessProperties(pigAction, process);
-
- if (shouldConfigureHive) { // adds hive-site.xml in pig classpath
- pigAction.getFile().add("${wf:appPath()}/conf/hive-site.xml");
- }
-
- addArchiveForCustomJars(cluster, pigAction.getArchive(),
- getUserLibPath(cluster, parentWfPath.getParent()));
- }
-
- private void decorateHiveAction(Cluster cluster, Process process, ACTION wfAction,
- Path parentWfPath) throws FalconException {
-
- JAXBElement<org.apache.falcon.oozie.hive.ACTION> actionJaxbElement = OozieUtils.unMarshalHiveAction(wfAction);
- org.apache.falcon.oozie.hive.ACTION hiveAction = actionJaxbElement.getValue();
-
- Path userWfPath = getUserWorkflowPath(cluster, parentWfPath.getParent());
- hiveAction.setScript("${nameNode}" + userWfPath.toString());
-
- addPrepareDeleteOutputPath(process, hiveAction);
-
- final List<String> paramList = hiveAction.getParam();
- addInputFeedsAsParams(paramList, process, cluster, EngineType.HIVE.name().toLowerCase());
- addOutputFeedsAsParams(paramList, process, cluster);
-
- propagateProcessProperties(hiveAction, process);
-
- // adds hive-site.xml in hive classpath
- hiveAction.setJobXml("${wf:appPath()}/conf/hive-site.xml");
-
- addArchiveForCustomJars(cluster, hiveAction.getArchive(),
- getUserLibPath(cluster, parentWfPath.getParent()));
-
- OozieUtils.marshalHiveAction(wfAction, actionJaxbElement);
- }
-
- private void addPrepareDeleteOutputPath(Process process,
- PIG pigAction) throws FalconException {
- List<String> deleteOutputPathList = getPrepareDeleteOutputPathList(process);
- if (deleteOutputPathList.isEmpty()) {
- return;
- }
-
- final PREPARE prepare = new PREPARE();
- final List<DELETE> deleteList = prepare.getDelete();
-
- for (String deletePath : deleteOutputPathList) {
- final DELETE delete = new DELETE();
- delete.setPath(deletePath);
- deleteList.add(delete);
- }
-
- if (!deleteList.isEmpty()) {
- pigAction.setPrepare(prepare);
- }
- }
-
- private void addPrepareDeleteOutputPath(Process process, org.apache.falcon.oozie.hive.ACTION hiveAction)
- throws FalconException {
-
- List<String> deleteOutputPathList = getPrepareDeleteOutputPathList(process);
- if (deleteOutputPathList.isEmpty()) {
- return;
- }
-
- org.apache.falcon.oozie.hive.PREPARE prepare = new org.apache.falcon.oozie.hive.PREPARE();
- List<org.apache.falcon.oozie.hive.DELETE> deleteList = prepare.getDelete();
-
- for (String deletePath : deleteOutputPathList) {
- org.apache.falcon.oozie.hive.DELETE delete = new org.apache.falcon.oozie.hive.DELETE();
- delete.setPath(deletePath);
- deleteList.add(delete);
- }
-
- if (!deleteList.isEmpty()) {
- hiveAction.setPrepare(prepare);
- }
- }
-
- private List<String> getPrepareDeleteOutputPathList(Process process) throws FalconException {
- final List<String> deleteList = new ArrayList<String>();
- if (process.getOutputs() == null) {
- return deleteList;
- }
-
- for (Output output : process.getOutputs().getOutputs()) {
- Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());
-
- if (FeedHelper.getStorageType(feed) == Storage.TYPE.TABLE) {
- continue; // prepare delete only applies to FileSystem storage
- }
-
- deleteList.add("${wf:conf('" + output.getName() + "')}");
- }
-
- return deleteList;
- }
-
- private void addInputFeedsAsParams(List<String> paramList, Process process, Cluster cluster,
- String engineType) throws FalconException {
- if (process.getInputs() == null) {
- return;
- }
-
- for (Input input : process.getInputs().getInputs()) {
- Feed feed = EntityUtil.getEntity(EntityType.FEED, input.getFeed());
- Storage storage = FeedHelper.createStorage(cluster, feed);
-
- final String inputName = input.getName();
- if (storage.getType() == Storage.TYPE.FILESYSTEM) {
- paramList.add(inputName + "=${" + inputName + "}"); // no prefix for backwards compatibility
- } else if (storage.getType() == Storage.TYPE.TABLE) {
- final String paramName = "falcon_" + inputName; // prefix 'falcon' for new params
- Map<String, String> props = new HashMap<String, String>();
- propagateCommonCatalogTableProperties((CatalogStorage) storage, props, paramName);
- for (String key : props.keySet()) {
- paramList.add(key + "=${wf:conf('" + key + "')}");
- }
-
- paramList.add(paramName + "_filter=${wf:conf('"
- + paramName + "_partition_filter_" + engineType + "')}");
- }
- }
- }
-
- private void addOutputFeedsAsParams(List<String> paramList, Process process,
- Cluster cluster) throws FalconException {
- if (process.getOutputs() == null) {
- return;
- }
-
- for (Output output : process.getOutputs().getOutputs()) {
- Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());
- Storage storage = FeedHelper.createStorage(cluster, feed);
-
- if (storage.getType() == Storage.TYPE.FILESYSTEM) {
- final String outputName = output.getName(); // no prefix for backwards compatibility
- paramList.add(outputName + "=${" + outputName + "}");
- } else if (storage.getType() == Storage.TYPE.TABLE) {
- Map<String, String> props = new HashMap<String, String>();
- propagateCatalogTableProperties(output, (CatalogStorage) storage, props); // prefix is auto added
- for (String key : props.keySet()) {
- paramList.add(key + "=${wf:conf('" + key + "')}");
- }
- }
- }
- }
-
- private void propagateProcessProperties(PIG pigAction, Process process) {
- org.apache.falcon.entity.v0.process.Properties processProperties = process.getProperties();
- if (processProperties == null) {
- return;
- }
-
- // Propagate user defined properties to job configuration
- final List<org.apache.falcon.oozie.workflow.CONFIGURATION.Property> configuration =
- pigAction.getConfiguration().getProperty();
-
- // Propagate user defined properties to pig script as macros
- // passed as parameters -p name=value that can be accessed as $name
- final List<String> paramList = pigAction.getParam();
-
- for (org.apache.falcon.entity.v0.process.Property property : processProperties.getProperties()) {
- org.apache.falcon.oozie.workflow.CONFIGURATION.Property configProperty =
- new org.apache.falcon.oozie.workflow.CONFIGURATION.Property();
- configProperty.setName(property.getName());
- configProperty.setValue(property.getValue());
- configuration.add(configProperty);
-
- paramList.add(property.getName() + "=" + property.getValue());
- }
- }
-
- private void propagateProcessProperties(org.apache.falcon.oozie.hive.ACTION hiveAction, Process process) {
- org.apache.falcon.entity.v0.process.Properties processProperties = process.getProperties();
- if (processProperties == null) {
- return;
- }
-
- // Propagate user defined properties to job configuration
- final List<org.apache.falcon.oozie.hive.CONFIGURATION.Property> configuration =
- hiveAction.getConfiguration().getProperty();
-
- // Propagate user defined properties to pig script as macros
- // passed as parameters -p name=value that can be accessed as $name
- final List<String> paramList = hiveAction.getParam();
-
- for (org.apache.falcon.entity.v0.process.Property property : processProperties.getProperties()) {
- org.apache.falcon.oozie.hive.CONFIGURATION.Property configProperty =
- new org.apache.falcon.oozie.hive.CONFIGURATION.Property();
- configProperty.setName(property.getName());
- configProperty.setValue(property.getValue());
- configuration.add(configProperty);
-
- paramList.add(property.getName() + "=" + property.getValue());
- }
- }
-
- private void addArchiveForCustomJars(Cluster cluster, List<String> archiveList,
- Path libPath) throws FalconException {
- if (libPath == null) {
- return;
- }
-
- try {
- final FileSystem fs = libPath.getFileSystem(ClusterHelper.getConfiguration(cluster));
- if (fs.isFile(libPath)) { // File, not a Dir
- archiveList.add(libPath.toString());
- return;
- }
-
- // lib path is a directory, add each file under the lib dir to archive
- final FileStatus[] fileStatuses = fs.listStatus(libPath, new PathFilter() {
- @Override
- public boolean accept(Path path) {
- try {
- return fs.isFile(path) && path.getName().endsWith(".jar");
- } catch (IOException ignore) {
- return false;
- }
- }
- });
-
- for (FileStatus fileStatus : fileStatuses) {
- archiveList.add(fileStatus.getPath().toString());
- }
- } catch (IOException e) {
- throw new FalconException("Error adding archive for custom jars under: " + libPath, e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/main/resources/config/workflow/process-parent-workflow.xml
----------------------------------------------------------------------
diff --git a/process/src/main/resources/config/workflow/process-parent-workflow.xml b/process/src/main/resources/config/workflow/process-parent-workflow.xml
deleted file mode 100644
index 4a2495c..0000000
--- a/process/src/main/resources/config/workflow/process-parent-workflow.xml
+++ /dev/null
@@ -1,278 +0,0 @@
-<!--
- ~ Licensed to the Apache Software Foundation (ASF) under one
- ~ or more contributor license agreements. See the NOTICE file
- ~ distributed with this work for additional information
- ~ regarding copyright ownership. The ASF licenses this file
- ~ to you under the Apache License, Version 2.0 (the
- ~ "License"); you may not use this file except in compliance
- ~ with the License. You may obtain a copy of the License at
- ~
- ~ http://www.apache.org/licenses/LICENSE-2.0
- ~
- ~ Unless required by applicable law or agreed to in writing, software
- ~ distributed under the License is distributed on an "AS IS" BASIS,
- ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ~ See the License for the specific language governing permissions and
- ~ limitations under the License.
- -->
-<workflow-app xmlns='uri:oozie:workflow:0.3' name='falcon-process-parent-workflow'>
- <start to='should-record'/>
- <decision name='should-record'>
- <switch>
- <case to="recordsize">
- ${shouldRecord=="true"}
- </case>
- <default to="user-workflow"/>
- </switch>
- </decision>
- <action name='recordsize'>
- <java>
- <job-tracker>${jobTracker}</job-tracker>
- <name-node>${nameNode}</name-node>
- <configuration>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- <!-- HCatalog jars -->
- <property>
- <name>oozie.action.sharelib.for.java</name>
- <value>hcatalog</value>
- </property>
- </configuration>
- <main-class>org.apache.falcon.latedata.LateDataHandler</main-class>
- <arg>-out</arg>
- <arg>${logDir}/latedata/${nominalTime}</arg>
- <arg>-paths</arg>
- <arg>${falconInPaths}</arg>
- <arg>-falconInputFeeds</arg>
- <arg>${falconInputFeeds}</arg>
- <arg>-falconInputFeedStorageTypes</arg>
- <arg>${falconInputFeedStorageTypes}</arg>
- <capture-output/>
- </java>
- <ok to="user-workflow"/>
- <error to="failed-post-processing"/>
- </action>
- <decision name='user-workflow'>
- <switch>
- <case to="user-oozie-workflow">
- ${userWorkflowEngine=="oozie"}
- </case>
- <case to="user-pig-job">
- ${userWorkflowEngine=="pig"}
- </case>
- <case to="user-hive-job">
- ${userWorkflowEngine=="hive"}
- </case>
- <default to="user-oozie-workflow"/>
- </switch>
- </decision>
- <action name='user-pig-job'>
- <pig>
- <job-tracker>${jobTracker}</job-tracker>
- <name-node>${nameNode}</name-node>
- <configuration>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- <property>
- <name>oozie.action.sharelib.for.pig</name>
- <value>pig,hcatalog</value>
- </property>
- </configuration>
- <script>#USER_WF_PATH#</script>
- </pig>
- <ok to="succeeded-post-processing"/>
- <error to="failed-post-processing"/>
- </action>
- <action name="user-hive-job">
- <hive xmlns="uri:oozie:hive-action:0.2">
- <job-tracker>${jobTracker}</job-tracker>
- <name-node>${nameNode}</name-node>
- <job-xml>${wf:appPath()}/conf/hive-site.xml</job-xml>
- <configuration>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- </configuration>
- <script>#USER_WF_PATH#</script>
- </hive>
- <ok to="succeeded-post-processing"/>
- <error to="failed-post-processing"/>
- </action>
- <action name='user-oozie-workflow'>
- <sub-workflow>
- <app-path>#USER_WF_PATH#</app-path>
- <propagate-configuration/>
- </sub-workflow>
- <ok to="succeeded-post-processing"/>
- <error to="failed-post-processing"/>
- </action>
- <action name='succeeded-post-processing'>
- <java>
- <job-tracker>${jobTracker}</job-tracker>
- <name-node>${nameNode}</name-node>
- <configuration>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- </configuration>
- <main-class>org.apache.falcon.workflow.FalconPostProcessing</main-class>
- <arg>-cluster</arg>
- <arg>${cluster}</arg>
- <arg>-entityType</arg>
- <arg>${entityType}</arg>
- <arg>-entityName</arg>
- <arg>${entityName}</arg>
- <arg>-nominalTime</arg>
- <arg>${nominalTime}</arg>
- <arg>-operation</arg>
- <arg>GENERATE</arg>
- <arg>-workflowId</arg>
- <arg>${wf:id()}</arg>
- <arg>-runId</arg>
- <arg>${wf:run()}</arg>
- <arg>-status</arg>
- <arg>SUCCEEDED</arg>
- <arg>-timeStamp</arg>
- <arg>${timeStamp}</arg>
- <arg>-brokerImplClass</arg>
- <arg>${wf:conf("broker.impl.class")}</arg>
- <arg>-brokerUrl</arg>
- <arg>${wf:conf("broker.url")}</arg>
- <arg>-userBrokerImplClass</arg>
- <arg>${userBrokerImplClass}</arg>
- <arg>-userBrokerUrl</arg>
- <arg>${userBrokerUrl}</arg>
- <arg>-brokerTTL</arg>
- <arg>${wf:conf("broker.ttlInMins")}</arg>
- <arg>-feedNames</arg>
- <arg>${feedNames}</arg>
- <arg>-feedInstancePaths</arg>
- <arg>${feedInstancePaths}</arg>
- <arg>-logFile</arg>
- <arg>${logDir}/instancePaths-${nominalTime}.csv</arg>
- <arg>-workflowEngineUrl</arg>
- <arg>${workflowEngineUrl}</arg>
- <arg>-subflowId</arg>
- <arg>${wf:id()}${userWorkflowEngine == "oozie" ? "@user-oozie-workflow" : ""}</arg>
- <arg>-userWorkflowEngine</arg>
- <arg>${userWorkflowEngine}</arg>
- <arg>-userWorkflowName</arg>
- <arg>${userWorkflowName}</arg>
- <arg>-userWorkflowVersion</arg>
- <arg>${userWorkflowVersion}</arg>
- <arg>-logDir</arg>
- <arg>${logDir}/job-${nominalTime}/</arg>
- <arg>-workflowUser</arg>
- <arg>${wf:user()}</arg>
- <arg>-falconInputFeeds</arg>
- <arg>${falconInputFeeds}</arg>
- <arg>-falconInputPaths</arg>
- <arg>${falconInPaths}</arg>
- <file>${wf:conf("falcon.libpath")}/activemq-core.jar</file>
- <file>${wf:conf("falcon.libpath")}/geronimo-j2ee-management.jar</file>
- <file>${wf:conf("falcon.libpath")}/jms.jar</file>
- <file>${wf:conf("falcon.libpath")}/json-simple.jar</file>
- <file>${wf:conf("falcon.libpath")}/oozie-client.jar</file>
- <file>${wf:conf("falcon.libpath")}/spring-jms.jar</file>
- </java>
- <ok to="end"/>
- <error to="fail"/>
- </action>
- <action name='failed-post-processing'>
- <java>
- <job-tracker>${jobTracker}</job-tracker>
- <name-node>${nameNode}</name-node>
- <configuration>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- </configuration>
- <main-class>org.apache.falcon.workflow.FalconPostProcessing</main-class>
- <arg>-cluster</arg>
- <arg>${cluster}</arg>
- <arg>-entityType</arg>
- <arg>${entityType}</arg>
- <arg>-entityName</arg>
- <arg>${entityName}</arg>
- <arg>-nominalTime</arg>
- <arg>${nominalTime}</arg>
- <arg>-operation</arg>
- <arg>GENERATE</arg>
- <arg>-workflowId</arg>
- <arg>${wf:id()}</arg>
- <arg>-runId</arg>
- <arg>${wf:run()}</arg>
- <arg>-status</arg>
- <arg>FAILED</arg>
- <arg>-timeStamp</arg>
- <arg>${timeStamp}</arg>
- <arg>-brokerImplClass</arg>
- <arg>${wf:conf("broker.impl.class")}</arg>
- <arg>-brokerUrl</arg>
- <arg>${wf:conf("broker.url")}</arg>
- <arg>-userBrokerImplClass</arg>
- <arg>${userBrokerImplClass}</arg>
- <arg>-userBrokerUrl</arg>
- <arg>${userBrokerUrl}</arg>
- <arg>-brokerTTL</arg>
- <arg>${wf:conf("broker.ttlInMins")}</arg>
- <arg>-feedNames</arg>
- <arg>${feedNames}</arg>
- <arg>-feedInstancePaths</arg>
- <arg>${feedInstancePaths}</arg>
- <arg>-logFile</arg>
- <arg>${logDir}/instancePaths-${nominalTime}.csv</arg>
- <arg>-workflowEngineUrl</arg>
- <arg>${workflowEngineUrl}</arg>
- <arg>-subflowId</arg>
- <arg>${wf:id()}${userWorkflowEngine == "oozie" ? "@user-oozie-workflow" : ""}</arg>
- <arg>-userWorkflowEngine</arg>
- <arg>${userWorkflowEngine}</arg>
- <arg>-logDir</arg>
- <arg>${logDir}/job-${nominalTime}/</arg>
- <arg>-workflowUser</arg>
- <arg>${wf:user()}</arg>
- <file>${wf:conf("falcon.libpath")}/activemq-core.jar</file>
- <file>${wf:conf("falcon.libpath")}/geronimo-j2ee-management.jar</file>
- <file>${wf:conf("falcon.libpath")}/jms.jar</file>
- <file>${wf:conf("falcon.libpath")}/json-simple.jar</file>
- <file>${wf:conf("falcon.libpath")}/oozie-client.jar</file>
- <file>${wf:conf("falcon.libpath")}/spring-jms.jar</file>
- </java>
- <ok to="fail"/>
- <error to="fail"/>
- </action>
- <kill name="fail">
- <message>
- Workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
- </message>
- </kill>
- <end name='end'/>
-</workflow-app>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/process/src/test/java/org/apache/falcon/converter/AbstractTestBase.java
----------------------------------------------------------------------
diff --git a/process/src/test/java/org/apache/falcon/converter/AbstractTestBase.java b/process/src/test/java/org/apache/falcon/converter/AbstractTestBase.java
deleted file mode 100644
index 2c7ee8b..0000000
--- a/process/src/test/java/org/apache/falcon/converter/AbstractTestBase.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.falcon.converter;
-
-import org.apache.falcon.entity.store.ConfigurationStore;
-import org.apache.falcon.entity.v0.EntityType;
-import org.apache.falcon.entity.v0.cluster.Cluster;
-import org.apache.falcon.entity.v0.feed.Feed;
-import org.apache.falcon.entity.v0.process.Process;
-
-import javax.xml.bind.Unmarshaller;
-
-/**
- * Base for falcon unit tests involving configuration store.
- */
-public class AbstractTestBase {
- private static final String PROCESS_XML = "/config/process/process-0.1.xml";
- private static final String FEED_XML = "/config/feed/feed-0.1.xml";
- private static final String CLUSTER_XML = "/config/cluster/cluster-0.1.xml";
- private static final String PIG_PROCESS_XML = "/config/process/pig-process-0.1.xml";
-
- protected void storeEntity(EntityType type, String name, String resource) throws Exception {
- Unmarshaller unmarshaller = type.getUnmarshaller();
- ConfigurationStore store = ConfigurationStore.get();
- store.remove(type, name);
- switch (type) {
- case CLUSTER:
- Cluster cluster = (Cluster) unmarshaller.unmarshal(this.getClass().getResource(resource));
- cluster.setName(name);
- store.publish(type, cluster);
- break;
-
- case FEED:
- Feed feed = (Feed) unmarshaller.unmarshal(this.getClass().getResource(resource));
- feed.setName(name);
- store.publish(type, feed);
- break;
-
- case PROCESS:
- Process process = (Process) unmarshaller.unmarshal(this.getClass().getResource(resource));
- process.setName(name);
- store.publish(type, process);
- break;
-
- default:
- }
- }
-
- public void setup() throws Exception {
- storeEntity(EntityType.CLUSTER, "corp", CLUSTER_XML);
- storeEntity(EntityType.FEED, "clicks", FEED_XML);
- storeEntity(EntityType.FEED, "impressions", FEED_XML);
- storeEntity(EntityType.FEED, "clicksummary", FEED_XML);
- storeEntity(EntityType.PROCESS, "clicksummary", PROCESS_XML);
- storeEntity(EntityType.PROCESS, "pig-process", PIG_PROCESS_XML);
- }
-
- public void cleanup() throws Exception {
- ConfigurationStore store = ConfigurationStore.get();
- store.remove(EntityType.PROCESS, "pig-process");
- store.remove(EntityType.PROCESS, "clicksummary");
- store.remove(EntityType.FEED, "clicksummary");
- store.remove(EntityType.FEED, "impressions");
- store.remove(EntityType.FEED, "clicks");
- store.remove(EntityType.CLUSTER, "corp");
- }
-}
[7/9] FALCON-369 Refactor workflow builder. Contributed by Shwetha GS
Posted by sh...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/OozieEntityBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/OozieEntityBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/OozieEntityBuilder.java
new file mode 100644
index 0000000..bb8dfcc
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/OozieEntityBuilder.java
@@ -0,0 +1,306 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.falcon.FalconException;
+import org.apache.falcon.entity.CatalogStorage;
+import org.apache.falcon.entity.ClusterHelper;
+import org.apache.falcon.entity.FeedHelper;
+import org.apache.falcon.entity.ProcessHelper;
+import org.apache.falcon.entity.Storage;
+import org.apache.falcon.entity.v0.Entity;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.cluster.Property;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.entity.v0.process.Input;
+import org.apache.falcon.entity.v0.process.Output;
+import org.apache.falcon.entity.v0.process.Process;
+import org.apache.falcon.hadoop.HadoopClientFactory;
+import org.apache.falcon.oozie.feed.FeedBundleBuilder;
+import org.apache.falcon.oozie.process.ProcessBundleBuilder;
+import org.apache.falcon.security.SecurityUtil;
+import org.apache.falcon.service.FalconPathFilter;
+import org.apache.falcon.service.SharedLibraryHostingService;
+import org.apache.falcon.util.StartupProperties;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBElement;
+import javax.xml.bind.Marshaller;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.StringWriter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+/**
+ * Base class for building oozie entities - workflow, coordinator and bundle.
+ * @param <T>
+ */
+public abstract class OozieEntityBuilder<T extends Entity> {
+ public static final Logger LOG = LoggerFactory.getLogger(OozieEntityBuilder.class);
+
+ public static final String METASTOREURIS = "hive.metastore.uris";
+ public static final String METASTORE_KERBEROS_PRINCIPAL = "hive.metastore.kerberos.principal";
+ public static final String METASTORE_USE_THRIFT_SASL = "hive.metastore.sasl.enabled";
+
+ public static final String ENTITY_PATH = "ENTITY_PATH";
+ public static final String ENTITY_NAME = "ENTITY_NAME";
+
+ private static final FalconPathFilter FALCON_JAR_FILTER = new FalconPathFilter() {
+ @Override
+ public boolean accept(Path path) {
+ return path.getName().startsWith("falcon");
+ }
+
+ @Override
+ public String getJarName(Path path) {
+ String name = path.getName();
+ if (name.endsWith(".jar")) {
+ name = name.substring(0, name.indexOf(".jar"));
+ }
+ return name;
+ }
+ };
+
+ protected T entity;
+ protected final boolean isSecurityEnabled = SecurityUtil.isSecurityEnabled();
+
+ public OozieEntityBuilder(T entity) {
+ this.entity = entity;
+ }
+
+ public abstract Properties build(Cluster cluster, Path buildPath) throws FalconException;
+
+ protected String getStoragePath(Path path) {
+ if (path != null) {
+ return getStoragePath(path.toString());
+ }
+ return null;
+ }
+
+ protected String getStoragePath(String path) {
+ if (StringUtils.isNotEmpty(path)) {
+ if (new Path(path).toUri().getScheme() == null && !path.startsWith("${nameNode}")) {
+ path = "${nameNode}" + path;
+ }
+ }
+ return path;
+ }
+
+ public static OozieEntityBuilder get(Entity entity) {
+ switch(entity.getEntityType()) {
+ case FEED:
+ return new FeedBundleBuilder((Feed) entity);
+
+ case PROCESS:
+ return new ProcessBundleBuilder((Process)entity);
+
+ default:
+ }
+ throw new IllegalArgumentException("Unhandled type: " + entity.getEntityType());
+ }
+
+ protected void marshal(Cluster cluster, JAXBElement<?> jaxbElement, JAXBContext jaxbContext, Path outPath)
+ throws FalconException {
+ try {
+ Marshaller marshaller = jaxbContext.createMarshaller();
+ marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);
+ FileSystem fs = HadoopClientFactory.get().createFileSystem(
+ outPath.toUri(), ClusterHelper.getConfiguration(cluster));
+ OutputStream out = fs.create(outPath);
+ try {
+ marshaller.marshal(jaxbElement, out);
+ } finally {
+ out.close();
+ }
+ if (LOG.isDebugEnabled()) {
+ StringWriter writer = new StringWriter();
+ marshaller.marshal(jaxbElement, writer);
+ LOG.debug("Writing definition to {} on cluster {}", outPath, cluster.getName());
+ LOG.debug(writer.getBuffer().toString());
+ }
+
+ LOG.info("Marshalled {} to {}", jaxbElement.getDeclaredType(), outPath);
+ } catch (Exception e) {
+ throw new FalconException("Unable to marshall app object", e);
+ }
+ }
+
+ protected boolean isTableStorageType(Cluster cluster) throws FalconException {
+ return entity.getEntityType() == EntityType.PROCESS
+ ? isTableStorageType(cluster, (Process) entity) : isTableStorageType(cluster, (Feed) entity);
+ }
+
+ protected boolean isTableStorageType(Cluster cluster, Feed feed) throws FalconException {
+ Storage.TYPE storageType = FeedHelper.getStorageType(feed, cluster);
+ return Storage.TYPE.TABLE == storageType;
+ }
+
+ protected boolean isTableStorageType(Cluster cluster, Process process) throws FalconException {
+ Storage.TYPE storageType = ProcessHelper.getStorageType(cluster, process);
+ return Storage.TYPE.TABLE == storageType;
+ }
+
+ protected Properties getHiveCredentials(Cluster cluster) {
+ String metaStoreUrl = ClusterHelper.getRegistryEndPoint(cluster);
+ if (metaStoreUrl == null) {
+ throw new IllegalStateException(
+ "Registry interface is not defined in cluster: " + cluster.getName());
+ }
+
+ Properties hiveCredentials = new Properties();
+ hiveCredentials.put(METASTOREURIS, metaStoreUrl);
+ hiveCredentials.put("hive.metastore.execute.setugi", "true");
+ hiveCredentials.put("hcatNode", metaStoreUrl.replace("thrift", "hcat"));
+ hiveCredentials.put("hcat.metastore.uri", metaStoreUrl);
+
+ if (isSecurityEnabled) {
+ String principal = ClusterHelper
+ .getPropertyValue(cluster, SecurityUtil.HIVE_METASTORE_PRINCIPAL);
+ hiveCredentials.put(METASTORE_KERBEROS_PRINCIPAL, principal);
+ hiveCredentials.put(METASTORE_USE_THRIFT_SASL, "true");
+ hiveCredentials.put("hcat.metastore.principal", principal);
+ }
+
+ return hiveCredentials;
+ }
+
+ protected Configuration getHiveCredentialsAsConf(Cluster cluster) {
+ Properties hiveCredentials = getHiveCredentials(cluster);
+
+ Configuration hiveConf = new Configuration(false);
+ for (Entry<Object, Object> entry : hiveCredentials.entrySet()) {
+ hiveConf.set((String)entry.getKey(), (String)entry.getValue());
+ }
+
+ return hiveConf;
+ }
+
+ protected Properties getEntityProperties(Entity myEntity) {
+ Properties properties = new Properties();
+ switch (myEntity.getEntityType()) {
+ case CLUSTER:
+ org.apache.falcon.entity.v0.cluster.Properties clusterProps = ((Cluster) myEntity).getProperties();
+ if (clusterProps != null) {
+ for (Property prop : clusterProps.getProperties()) {
+ properties.put(prop.getName(), prop.getValue());
+ }
+ }
+ break;
+
+ case FEED:
+ org.apache.falcon.entity.v0.feed.Properties feedProps = ((Feed) myEntity).getProperties();
+ if (feedProps != null) {
+ for (org.apache.falcon.entity.v0.feed.Property prop : feedProps.getProperties()) {
+ properties.put(prop.getName(), prop.getValue());
+ }
+ }
+ break;
+
+ case PROCESS:
+ org.apache.falcon.entity.v0.process.Properties processProps = ((Process) myEntity).getProperties();
+ if (processProps != null) {
+ for (org.apache.falcon.entity.v0.process.Property prop : processProps.getProperties()) {
+ properties.put(prop.getName(), prop.getValue());
+ }
+ }
+ break;
+
+ default:
+ throw new IllegalArgumentException("Unhandled entity type " + myEntity.getEntityType());
+ }
+ return properties;
+ }
+
+ protected void propagateCatalogTableProperties(Input input, CatalogStorage tableStorage, Properties props) {
+ String prefix = "falcon_" + input.getName();
+
+ propagateCommonCatalogTableProperties(tableStorage, props, prefix);
+
+ props.put(prefix + "_partition_filter_pig",
+ "${coord:dataInPartitionFilter('" + input.getName() + "', 'pig')}");
+ props.put(prefix + "_partition_filter_hive",
+ "${coord:dataInPartitionFilter('" + input.getName() + "', 'hive')}");
+ props.put(prefix + "_partition_filter_java",
+ "${coord:dataInPartitionFilter('" + input.getName() + "', 'java')}");
+ props.put(prefix + "_datain_partitions_hive",
+ "${coord:dataInPartitions('" + input.getName() + "', 'hive-export')}");
+ }
+
+ protected void propagateCatalogTableProperties(Output output, CatalogStorage tableStorage, Properties props) {
+ String prefix = "falcon_" + output.getName();
+
+ propagateCommonCatalogTableProperties(tableStorage, props, prefix);
+
+ //pig and java actions require partition expression as "key1=val1, key2=val2"
+ props.put(prefix + "_partitions_pig",
+ "${coord:dataOutPartitions('" + output.getName() + "')}");
+ props.put(prefix + "_partitions_java",
+ "${coord:dataOutPartitions('" + output.getName() + "')}");
+
+ //hive requires partition expression as "key1='val1', key2='val2'" (with quotes around values)
+ //there is no direct EL expression in oozie
+ List<String> partitions = new ArrayList<String>();
+ for (String key : tableStorage.getDatedPartitionKeys()) {
+ StringBuilder expr = new StringBuilder();
+ expr.append("${coord:dataOutPartitionValue('").append(output.getName()).append("', '").append(key)
+ .append("')}");
+ props.put(prefix + "_dated_partition_value_" + key, expr.toString());
+ partitions.add(key + "='" + expr + "'");
+
+ }
+ props.put(prefix + "_partitions_hive", StringUtils.join(partitions, ","));
+ }
+
+ protected void propagateCommonCatalogTableProperties(CatalogStorage tableStorage, Properties props, String prefix) {
+ props.put(prefix + "_storage_type", tableStorage.getType().name());
+ props.put(prefix + "_catalog_url", tableStorage.getCatalogUrl());
+ props.put(prefix + "_database", tableStorage.getDatabase());
+ props.put(prefix + "_table", tableStorage.getTable());
+ }
+
+ protected void copySharedLibs(Cluster cluster, Path libPath) throws FalconException {
+ try {
+ SharedLibraryHostingService.pushLibsToHDFS(StartupProperties.get().getProperty("system.lib.location"),
+ libPath, cluster, FALCON_JAR_FILTER);
+ } catch (IOException e) {
+ throw new FalconException("Failed to copy shared libs on cluster " + cluster.getName(), e);
+ }
+ }
+
+ protected Properties getProperties(Path path, String name) {
+ if (path == null) {
+ return null;
+ }
+
+ Properties prop = new Properties();
+ prop.setProperty(OozieEntityBuilder.ENTITY_PATH, getStoragePath(path));
+ prop.setProperty(OozieEntityBuilder.ENTITY_NAME, name);
+ return prop;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/OozieOrchestrationWorkflowBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/OozieOrchestrationWorkflowBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/OozieOrchestrationWorkflowBuilder.java
new file mode 100644
index 0000000..ac78297
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/OozieOrchestrationWorkflowBuilder.java
@@ -0,0 +1,302 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.falcon.FalconException;
+import org.apache.falcon.Tag;
+import org.apache.falcon.entity.ClusterHelper;
+import org.apache.falcon.entity.v0.Entity;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.entity.v0.process.Process;
+import org.apache.falcon.hadoop.HadoopClientFactory;
+import org.apache.falcon.oozie.feed.FeedReplicationWorkflowBuilder;
+import org.apache.falcon.oozie.feed.FeedRetentionWorkflowBuilder;
+import org.apache.falcon.oozie.process.HiveProcessWorkflowBuilder;
+import org.apache.falcon.oozie.process.OozieProcessWorkflowBuilder;
+import org.apache.falcon.oozie.process.PigProcessWorkflowBuilder;
+import org.apache.falcon.oozie.workflow.ACTION;
+import org.apache.falcon.oozie.workflow.CREDENTIAL;
+import org.apache.falcon.oozie.workflow.CREDENTIALS;
+import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
+import org.apache.falcon.security.SecurityUtil;
+import org.apache.falcon.util.OozieUtils;
+import org.apache.falcon.util.RuntimeProperties;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import javax.xml.bind.JAXBElement;
+import javax.xml.bind.JAXBException;
+import javax.xml.bind.Unmarshaller;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
+
+/**
+ * Base class for building orchestration workflow in oozie.
+ * @param <T>
+ */
+public abstract class OozieOrchestrationWorkflowBuilder<T extends Entity> extends OozieEntityBuilder<T> {
+ protected static final String HIVE_CREDENTIAL_NAME = "falconHiveAuth";
+ public static final Set<String> FALCON_ACTIONS = new HashSet<String>(
+ Arrays.asList(new String[]{"recordsize", "succeeded-post-processing", "failed-post-processing", }));
+ private final Tag lifecycle;
+
+ public OozieOrchestrationWorkflowBuilder(T entity, Tag lifecycle) {
+ super(entity);
+ this.lifecycle = lifecycle;
+ }
+
+ public static final OozieOrchestrationWorkflowBuilder get(Entity entity, Tag lifecycle) {
+ switch(entity.getEntityType()) {
+ case FEED:
+ Feed feed = (Feed) entity;
+ switch (lifecycle) {
+ case RETENTION:
+ return new FeedRetentionWorkflowBuilder(feed);
+
+ case REPLICATION:
+ return new FeedReplicationWorkflowBuilder(feed);
+
+ default:
+ throw new IllegalArgumentException("Unhandled type " + entity.getEntityType() + ", lifecycle "
+ + lifecycle);
+ }
+
+ case PROCESS:
+ Process process = (Process) entity;
+ switch(process.getWorkflow().getEngine()) {
+ case PIG:
+ return new PigProcessWorkflowBuilder(process);
+
+ case OOZIE:
+ return new OozieProcessWorkflowBuilder(process);
+
+ case HIVE:
+ return new HiveProcessWorkflowBuilder(process);
+
+ default:
+ break;
+ }
+
+ default:
+ }
+
+ throw new IllegalArgumentException("Unhandled type " + entity.getEntityType() + ", lifecycle " + lifecycle);
+ }
+
+ protected void marshal(Cluster cluster, WORKFLOWAPP workflow, Path outPath) throws FalconException {
+ marshal(cluster, new org.apache.falcon.oozie.workflow.ObjectFactory().createWorkflowApp(workflow),
+ OozieUtils.WORKFLOW_JAXB_CONTEXT, new Path(outPath, "workflow.xml"));
+ }
+
+ protected WORKFLOWAPP getWorkflow(String template) throws FalconException {
+ InputStream resourceAsStream = null;
+ try {
+ resourceAsStream = OozieOrchestrationWorkflowBuilder.class.getResourceAsStream(template);
+ Unmarshaller unmarshaller = OozieUtils.WORKFLOW_JAXB_CONTEXT.createUnmarshaller();
+ @SuppressWarnings("unchecked")
+ JAXBElement<WORKFLOWAPP> jaxbElement = (JAXBElement<WORKFLOWAPP>) unmarshaller.unmarshal(resourceAsStream);
+ return jaxbElement.getValue();
+ } catch (JAXBException e) {
+ throw new FalconException(e);
+ } finally {
+ IOUtils.closeQuietly(resourceAsStream);
+ }
+ }
+
+ protected void addLibExtensionsToWorkflow(Cluster cluster, WORKFLOWAPP wf, Tag tag)
+ throws FalconException {
+ String libext = ClusterHelper.getLocation(cluster, "working") + "/libext";
+ FileSystem fs = HadoopClientFactory.get().createFileSystem(ClusterHelper.getConfiguration(cluster));
+ try {
+ addExtensionJars(fs, new Path(libext), wf);
+ addExtensionJars(fs, new Path(libext, entity.getEntityType().name()), wf);
+ if (tag != null) {
+ addExtensionJars(fs,
+ new Path(libext, entity.getEntityType().name() + "/" + tag.name().toLowerCase()), wf);
+ }
+ } catch(IOException e) {
+ throw new FalconException(e);
+ }
+ }
+
+ private void addExtensionJars(FileSystem fs, Path path, WORKFLOWAPP wf) throws IOException {
+ FileStatus[] libs = null;
+ try {
+ libs = fs.listStatus(path);
+ } catch(FileNotFoundException ignore) {
+ //Ok if the libext is not configured
+ }
+
+ if (libs == null) {
+ return;
+ }
+
+ for(FileStatus lib : libs) {
+ if (lib.isDir()) {
+ continue;
+ }
+
+ for(Object obj: wf.getDecisionOrForkOrJoin()) {
+ if (!(obj instanceof ACTION)) {
+ continue;
+ }
+ ACTION action = (ACTION) obj;
+ List<String> files = null;
+ if (action.getJava() != null) {
+ files = action.getJava().getFile();
+ } else if (action.getPig() != null) {
+ files = action.getPig().getFile();
+ } else if (action.getMapReduce() != null) {
+ files = action.getMapReduce().getFile();
+ }
+ if (files != null) {
+ files.add(lib.getPath().toString());
+ }
+ }
+ }
+ }
+
+ // creates hive-site.xml configuration in conf dir for the given cluster on the same cluster.
+ protected void createHiveConfiguration(Cluster cluster, Path workflowPath, String prefix) throws FalconException {
+ Configuration hiveConf = getHiveCredentialsAsConf(cluster);
+
+ try {
+ Configuration conf = ClusterHelper.getConfiguration(cluster);
+ FileSystem fs = HadoopClientFactory.get().createFileSystem(conf);
+
+ // create hive conf to stagingDir
+ Path confPath = new Path(workflowPath + "/conf");
+
+ persistHiveConfiguration(fs, confPath, hiveConf, prefix);
+ } catch (IOException e) {
+ throw new FalconException("Unable to create create hive site", e);
+ }
+ }
+
+ private void persistHiveConfiguration(FileSystem fs, Path confPath, Configuration hiveConf,
+ String prefix) throws IOException {
+ OutputStream out = null;
+ try {
+ out = fs.create(new Path(confPath, prefix + "hive-site.xml"));
+ hiveConf.writeXml(out);
+ } finally {
+ IOUtils.closeQuietly(out);
+ }
+ }
+
+ /**
+ * This is only necessary if table is involved and is secure mode.
+ *
+ * @param workflowApp workflow xml
+ * @param cluster cluster entity
+ */
+ protected void addHCatalogCredentials(WORKFLOWAPP workflowApp, Cluster cluster, String credentialName) {
+ CREDENTIALS credentials = workflowApp.getCredentials();
+ if (credentials == null) {
+ credentials = new CREDENTIALS();
+ }
+
+ credentials.getCredential().add(createHCatalogCredential(cluster, credentialName));
+
+ // add credential for workflow
+ workflowApp.setCredentials(credentials);
+ }
+
+ /**
+ * This is only necessary if table is involved and is secure mode.
+ *
+ * @param workflowApp workflow xml
+ * @param cluster cluster entity
+ */
+ protected void addHCatalogCredentials(WORKFLOWAPP workflowApp, Cluster cluster,
+ String credentialName, Set<String> actions) {
+ addHCatalogCredentials(workflowApp, cluster, credentialName);
+
+ // add credential to each action
+ for (Object object : workflowApp.getDecisionOrForkOrJoin()) {
+ if (!(object instanceof ACTION)) {
+ continue;
+ }
+
+ ACTION action = (ACTION) object;
+ String actionName = action.getName();
+ if (actions.contains(actionName)) {
+ action.setCred(credentialName);
+ }
+ }
+ }
+
+ /**
+ * This is only necessary if table is involved and is secure mode.
+ *
+ * @param cluster cluster entity
+ * @param credentialName credential name
+ * @return CREDENTIALS object
+ */
+ private CREDENTIAL createHCatalogCredential(Cluster cluster, String credentialName) {
+ final String metaStoreUrl = ClusterHelper.getRegistryEndPoint(cluster);
+
+ CREDENTIAL credential = new CREDENTIAL();
+ credential.setName(credentialName);
+ credential.setType("hcat");
+
+ credential.getProperty().add(createProperty("hcat.metastore.uri", metaStoreUrl));
+ credential.getProperty().add(createProperty("hcat.metastore.principal",
+ ClusterHelper.getPropertyValue(cluster, SecurityUtil.HIVE_METASTORE_PRINCIPAL)));
+
+ return credential;
+ }
+
+ private CREDENTIAL.Property createProperty(String name, String value) {
+ CREDENTIAL.Property property = new CREDENTIAL.Property();
+ property.setName(name);
+ property.setValue(value);
+ return property;
+ }
+
+ protected void addOozieRetries(WORKFLOWAPP workflow) {
+ for (Object object : workflow.getDecisionOrForkOrJoin()) {
+ if (!(object instanceof org.apache.falcon.oozie.workflow.ACTION)) {
+ continue;
+ }
+ org.apache.falcon.oozie.workflow.ACTION action = (org.apache.falcon.oozie.workflow.ACTION) object;
+ String actionName = action.getName();
+ if (FALCON_ACTIONS.contains(actionName)) {
+ decorateWithOozieRetries(action);
+ }
+ }
+ }
+
+ protected void decorateWithOozieRetries(ACTION action) {
+ Properties props = RuntimeProperties.get();
+ action.setRetryMax(props.getProperty("falcon.parentworkflow.retry.max", "3"));
+ action.setRetryInterval(props.getProperty("falcon.parentworkflow.retry.interval.secs", "1"));
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedBundleBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedBundleBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedBundleBuilder.java
new file mode 100644
index 0000000..6917f4e
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedBundleBuilder.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie.feed;
+
+import org.apache.falcon.FalconException;
+import org.apache.falcon.Tag;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.oozie.OozieBundleBuilder;
+import org.apache.falcon.oozie.OozieCoordinatorBuilder;
+import org.apache.hadoop.fs.Path;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * Builds oozie bundle for the feed.
+ */
+public class FeedBundleBuilder extends OozieBundleBuilder<Feed> {
+ public FeedBundleBuilder(Feed entity) {
+ super(entity);
+ }
+
+ @Override protected Path getLibPath(Cluster cluster, Path buildPath) {
+ return new Path(buildPath, "lib");
+ }
+
+ @Override protected List<Properties> doBuild(Cluster cluster, Path buildPath) throws FalconException {
+ List<Properties> props = new ArrayList<Properties>();
+ List<Properties> evictionProps =
+ OozieCoordinatorBuilder.get(entity, Tag.RETENTION).buildCoords(cluster, buildPath);
+ if (evictionProps != null) {
+ props.addAll(evictionProps);
+ }
+
+ List<Properties> replicationProps = OozieCoordinatorBuilder.get(entity, Tag.REPLICATION).buildCoords(cluster,
+ buildPath);
+ if (replicationProps != null) {
+ props.addAll(replicationProps);
+ }
+
+ if (!props.isEmpty()) {
+ copySharedLibs(cluster, getLibPath(cluster, buildPath));
+ }
+
+ return props;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedReplicationCoordinatorBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedReplicationCoordinatorBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedReplicationCoordinatorBuilder.java
new file mode 100644
index 0000000..3226cf2
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedReplicationCoordinatorBuilder.java
@@ -0,0 +1,418 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie.feed;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.falcon.FalconException;
+import org.apache.falcon.Tag;
+import org.apache.falcon.entity.CatalogStorage;
+import org.apache.falcon.entity.ClusterHelper;
+import org.apache.falcon.entity.EntityUtil;
+import org.apache.falcon.entity.FeedHelper;
+import org.apache.falcon.entity.Storage;
+import org.apache.falcon.entity.store.ConfigurationStore;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.Frequency;
+import org.apache.falcon.entity.v0.SchemaHelper;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.feed.ClusterType;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.entity.v0.feed.LocationType;
+import org.apache.falcon.expression.ExpressionHelper;
+import org.apache.falcon.hadoop.HadoopClientFactory;
+import org.apache.falcon.messaging.EntityInstanceMessage.ARG;
+import org.apache.falcon.oozie.OozieCoordinatorBuilder;
+import org.apache.falcon.oozie.OozieOrchestrationWorkflowBuilder;
+import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
+import org.apache.falcon.oozie.coordinator.SYNCDATASET;
+import org.apache.falcon.oozie.coordinator.WORKFLOW;
+import org.apache.falcon.oozie.coordinator.ACTION;
+import org.apache.falcon.util.RuntimeProperties;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * Builds oozie coordinator for feed replication, one per source-target cluster combination.
+ */
+public class FeedReplicationCoordinatorBuilder extends OozieCoordinatorBuilder<Feed> {
+ private static final String REPLICATION_COORD_TEMPLATE = "/coordinator/replication-coordinator.xml";
+ private static final int THIRTY_MINUTES = 30 * 60 * 1000;
+
+ private static final String PARALLEL = "parallel";
+ private static final String TIMEOUT = "timeout";
+ private static final String MR_MAX_MAPS = "maxMaps";
+ private static final String MR_MAP_BANDWIDTH = "mapBandwidthKB";
+
+ public FeedReplicationCoordinatorBuilder(Feed entity) {
+ super(entity, Tag.REPLICATION);
+ }
+
+ @Override public List<Properties> buildCoords(Cluster cluster, Path buildPath) throws FalconException {
+ org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(entity, cluster.getName());
+ if (feedCluster.getType() == ClusterType.TARGET) {
+ List<Properties> props = new ArrayList<Properties>();
+ OozieOrchestrationWorkflowBuilder builder = OozieOrchestrationWorkflowBuilder.get(entity, Tag.REPLICATION);
+ for (org.apache.falcon.entity.v0.feed.Cluster srcFeedCluster : entity.getClusters().getClusters()) {
+
+ if (srcFeedCluster.getType() == ClusterType.SOURCE) {
+ Cluster srcCluster = ConfigurationStore.get().get(EntityType.CLUSTER, srcFeedCluster.getName());
+ // workflow is serialized to a specific dir
+ Path coordPath = new Path(buildPath, Tag.REPLICATION.name() + "/" + srcCluster.getName());
+
+ // Different workflow for each source since hive credentials vary for each cluster
+ builder.build(cluster, coordPath);
+
+ props.add(doBuild(srcCluster, cluster, coordPath));
+ }
+ }
+ return props;
+ }
+ return null;
+ }
+
+ private Properties doBuild(Cluster srcCluster, Cluster trgCluster, Path buildPath) throws FalconException {
+ long replicationDelayInMillis = getReplicationDelayInMillis(srcCluster);
+ Date sourceStartDate = getStartDate(srcCluster, replicationDelayInMillis);
+ Date sourceEndDate = getEndDate(srcCluster);
+
+ Date targetStartDate = getStartDate(trgCluster, replicationDelayInMillis);
+ Date targetEndDate = getEndDate(trgCluster);
+
+ if (noOverlapExists(sourceStartDate, sourceEndDate,
+ targetStartDate, targetEndDate)) {
+ LOG.warn("Not creating replication coordinator, as the source cluster: {} and target cluster: {} do "
+ + "not have overlapping dates", srcCluster.getName(), trgCluster.getName());
+ return null;
+ }
+
+ COORDINATORAPP coord = getCoordinatorTemplate(REPLICATION_COORD_TEMPLATE);
+
+ String coordName = EntityUtil.getWorkflowName(Tag.REPLICATION, Arrays.asList(srcCluster.getName()),
+ entity).toString();
+ String start = sourceStartDate.after(targetStartDate)
+ ? SchemaHelper.formatDateUTC(sourceStartDate) : SchemaHelper.formatDateUTC(targetStartDate);
+ String end = sourceEndDate.before(targetEndDate)
+ ? SchemaHelper.formatDateUTC(sourceEndDate) : SchemaHelper.formatDateUTC(targetEndDate);
+
+ initializeCoordAttributes(coord, coordName, start, end, replicationDelayInMillis);
+ setCoordControls(coord);
+
+ final Storage sourceStorage = FeedHelper.createReadOnlyStorage(srcCluster, entity);
+ initializeInputDataSet(srcCluster, coord, sourceStorage);
+
+ final Storage targetStorage = FeedHelper.createStorage(trgCluster, entity);
+ initializeOutputDataSet(trgCluster, coord, targetStorage);
+
+ ACTION replicationWorkflowAction = getReplicationWorkflowAction(
+ srcCluster, trgCluster, buildPath, coordName, sourceStorage, targetStorage);
+ coord.setAction(replicationWorkflowAction);
+
+ marshal(trgCluster, coord, buildPath);
+ return getProperties(buildPath, coordName);
+ }
+
+ private ACTION getReplicationWorkflowAction(Cluster srcCluster, Cluster trgCluster, Path buildPath,
+ String wfName, Storage sourceStorage, Storage targetStorage) throws FalconException {
+ ACTION action = new ACTION();
+ WORKFLOW workflow = new WORKFLOW();
+
+ workflow.setAppPath(getStoragePath(buildPath.toString()));
+ Properties props = createCoordDefaultConfiguration(trgCluster, wfName);
+ props.put("srcClusterName", srcCluster.getName());
+ props.put("srcClusterColo", srcCluster.getColo());
+ if (props.get(MR_MAX_MAPS) == null) { // set default if user has not overridden
+ props.put(MR_MAX_MAPS, getDefaultMaxMaps());
+ }
+ if (props.get(MR_MAP_BANDWIDTH) == null) { // set default if user has not overridden
+ props.put(MR_MAP_BANDWIDTH, getDefaultMapBandwidth());
+ }
+
+ // the storage type is uniform across source and target feeds for replication
+ props.put("falconFeedStorageType", sourceStorage.getType().name());
+
+ String instancePaths = "";
+ if (sourceStorage.getType() == Storage.TYPE.FILESYSTEM) {
+ String pathsWithPartitions = getPathsWithPartitions(srcCluster, trgCluster);
+ instancePaths = pathsWithPartitions;
+
+ propagateFileSystemCopyProperties(pathsWithPartitions, props);
+ } else if (sourceStorage.getType() == Storage.TYPE.TABLE) {
+ instancePaths = "${coord:dataIn('input')}";
+ final CatalogStorage sourceTableStorage = (CatalogStorage) sourceStorage;
+ propagateTableStorageProperties(srcCluster, sourceTableStorage, props, "falconSource");
+ final CatalogStorage targetTableStorage = (CatalogStorage) targetStorage;
+ propagateTableStorageProperties(trgCluster, targetTableStorage, props, "falconTarget");
+ propagateTableCopyProperties(srcCluster, sourceTableStorage, trgCluster, targetTableStorage, props);
+ setupHiveConfiguration(srcCluster, trgCluster, buildPath);
+ }
+
+ propagateLateDataProperties(instancePaths, sourceStorage.getType().name(), props);
+ props.putAll(FeedHelper.getUserWorkflowProperties("replication"));
+
+ workflow.setConfiguration(getConfig(props));
+ action.setWorkflow(workflow);
+
+ return action;
+ }
+
+ private String getDefaultMaxMaps() {
+ return RuntimeProperties.get().getProperty("falcon.replication.workflow.maxmaps", "5");
+ }
+
+ private String getDefaultMapBandwidth() {
+ return RuntimeProperties.get().getProperty("falcon.replication.workflow.mapbandwidthKB", "102400");
+ }
+
+ private String getPathsWithPartitions(Cluster srcCluster, Cluster trgCluster) throws FalconException {
+ String srcPart = FeedHelper.normalizePartitionExpression(
+ FeedHelper.getCluster(entity, srcCluster.getName()).getPartition());
+ srcPart = FeedHelper.evaluateClusterExp(srcCluster, srcPart);
+
+ String targetPart = FeedHelper.normalizePartitionExpression(
+ FeedHelper.getCluster(entity, trgCluster.getName()).getPartition());
+ targetPart = FeedHelper.evaluateClusterExp(trgCluster, targetPart);
+
+ StringBuilder pathsWithPartitions = new StringBuilder();
+ pathsWithPartitions.append("${coord:dataIn('input')}/")
+ .append(FeedHelper.normalizePartitionExpression(srcPart, targetPart));
+
+ String parts = pathsWithPartitions.toString().replaceAll("//+", "/");
+ parts = StringUtils.stripEnd(parts, "/");
+ return parts;
+ }
+
+ private void propagateFileSystemCopyProperties(String paths, Properties props) throws FalconException {
+ props.put("sourceRelativePaths", paths);
+
+ props.put("distcpSourcePaths", "${coord:dataIn('input')}");
+ props.put("distcpTargetPaths", "${coord:dataOut('output')}");
+ }
+
+ private void propagateTableStorageProperties(Cluster cluster, CatalogStorage tableStorage,
+ Properties props, String prefix) {
+ props.put(prefix + "NameNode", ClusterHelper.getStorageUrl(cluster));
+ props.put(prefix + "JobTracker", ClusterHelper.getMREndPoint(cluster));
+ props.put(prefix + "HcatNode", tableStorage.getCatalogUrl());
+
+ props.put(prefix + "Database", tableStorage.getDatabase());
+ props.put(prefix + "Table", tableStorage.getTable());
+ props.put(prefix + "Partition", "${coord:dataInPartitions('input', 'hive-export')}");
+ }
+
+ private void propagateTableCopyProperties(Cluster srcCluster, CatalogStorage sourceStorage,
+ Cluster trgCluster, CatalogStorage targetStorage, Properties props) {
+ // create staging dirs for export at source & set it as distcpSourcePaths
+ String sourceStagingPath =
+ FeedHelper.getStagingPath(srcCluster, entity, sourceStorage, Tag.REPLICATION,
+ NOMINAL_TIME_EL + "/" + trgCluster.getName());
+ props.put("distcpSourcePaths", sourceStagingPath);
+
+ // create staging dirs for import at target & set it as distcpTargetPaths
+ String targetStagingPath =
+ FeedHelper.getStagingPath(trgCluster, entity, targetStorage, Tag.REPLICATION,
+ NOMINAL_TIME_EL + "/" + trgCluster.getName());
+ props.put("distcpTargetPaths", targetStagingPath);
+
+ props.put("sourceRelativePaths", IGNORE); // this will bot be used for Table storage.
+ }
+
+ private void propagateLateDataProperties(String instancePaths, String falconFeedStorageType, Properties props) {
+ // todo these pairs are the same but used in different context
+ // late data handler - should-record action
+ props.put("falconInputFeeds", entity.getName());
+ props.put("falconInPaths", instancePaths);
+
+ // storage type for each corresponding feed - in this case only one feed is involved
+ // needed to compute usage based on storage type in LateDataHandler
+ props.put("falconInputFeedStorageTypes", falconFeedStorageType);
+
+ // falcon post processing
+ props.put(ARG.feedNames.getPropName(), entity.getName());
+ props.put(ARG.feedInstancePaths.getPropName(), "${coord:dataOut('output')}");
+ }
+
+ private void setupHiveConfiguration(Cluster srcCluster, Cluster trgCluster, Path buildPath) throws FalconException {
+ Configuration conf = ClusterHelper.getConfiguration(trgCluster);
+ FileSystem fs = HadoopClientFactory.get().createFileSystem(conf);
+
+ try {
+ // copy import export scripts to stagingDir
+ Path scriptPath = new Path(buildPath, "scripts");
+ copyHiveScript(fs, scriptPath, "/workflow/", "falcon-table-export.hql");
+ copyHiveScript(fs, scriptPath, "/workflow/", "falcon-table-import.hql");
+
+ // create hive conf to stagingDir
+ Path confPath = new Path(buildPath + "/conf");
+ persistHiveConfiguration(fs, confPath, srcCluster, "falcon-source-");
+ persistHiveConfiguration(fs, confPath, trgCluster, "falcon-target-");
+ } catch (IOException e) {
+ throw new FalconException("Unable to create hive conf files", e);
+ }
+ }
+
+ private void copyHiveScript(FileSystem fs, Path scriptPath, String localScriptPath,
+ String scriptName) throws IOException {
+ OutputStream out = null;
+ InputStream in = null;
+ try {
+ out = fs.create(new Path(scriptPath, scriptName));
+ in = FeedReplicationCoordinatorBuilder.class.getResourceAsStream(localScriptPath + scriptName);
+ IOUtils.copy(in, out);
+ } finally {
+ IOUtils.closeQuietly(in);
+ IOUtils.closeQuietly(out);
+ }
+ }
+
+ protected void persistHiveConfiguration(FileSystem fs, Path confPath,
+ Cluster cluster, String prefix) throws IOException {
+ Configuration hiveConf = getHiveCredentialsAsConf(cluster);
+ OutputStream out = null;
+ try {
+ out = fs.create(new Path(confPath, prefix + "hive-site.xml"));
+ hiveConf.writeXml(out);
+ } finally {
+ IOUtils.closeQuietly(out);
+ }
+ }
+
+ private void initializeCoordAttributes(COORDINATORAPP coord, String coordName, String start, String end,
+ long delayInMillis) {
+ coord.setName(coordName);
+ coord.setFrequency("${coord:" + entity.getFrequency().toString() + "}");
+
+ if (delayInMillis > 0) {
+ long delayInMins = -1 * delayInMillis / (1000 * 60);
+ String elExp = "${now(0," + delayInMins + ")}";
+
+ coord.getInputEvents().getDataIn().get(0).getInstance().set(0, elExp);
+ coord.getOutputEvents().getDataOut().get(0).setInstance(elExp);
+ }
+
+ coord.setStart(start);
+ coord.setEnd(end);
+ coord.setTimezone(entity.getTimezone().getID());
+ }
+
+ private void setCoordControls(COORDINATORAPP coord) throws FalconException {
+ long frequencyInMillis = ExpressionHelper.get().evaluate(entity.getFrequency().toString(), Long.class);
+ long timeoutInMillis = frequencyInMillis * 6;
+ if (timeoutInMillis < THIRTY_MINUTES) {
+ timeoutInMillis = THIRTY_MINUTES;
+ }
+
+ Properties props = getEntityProperties(entity);
+ String timeout = props.getProperty(TIMEOUT);
+ if (timeout!=null) {
+ try{
+ timeoutInMillis= ExpressionHelper.get().evaluate(timeout, Long.class);
+ } catch (Exception ignore) {
+ LOG.error("Unable to evaluate timeout:", ignore);
+ }
+ }
+ coord.getControls().setTimeout(String.valueOf(timeoutInMillis / (1000 * 60)));
+ coord.getControls().setThrottle(String.valueOf(timeoutInMillis / frequencyInMillis * 2));
+
+ String parallelProp = props.getProperty(PARALLEL);
+ int parallel = 1;
+ if (parallelProp != null) {
+ try {
+ parallel = Integer.parseInt(parallelProp);
+ } catch (NumberFormatException ignore) {
+ LOG.error("Unable to parse parallel:", ignore);
+ }
+ }
+ coord.getControls().setConcurrency(String.valueOf(parallel));
+ }
+
+
+ private void initializeInputDataSet(Cluster cluster, COORDINATORAPP coord, Storage storage) throws FalconException {
+ SYNCDATASET inputDataset = (SYNCDATASET)coord.getDatasets().getDatasetOrAsyncDataset().get(0);
+
+ String uriTemplate = storage.getUriTemplate(LocationType.DATA);
+ if (storage.getType() == Storage.TYPE.TABLE) {
+ uriTemplate = uriTemplate.replace("thrift", "hcat"); // Oozie requires this!!!
+ }
+ inputDataset.setUriTemplate(uriTemplate);
+
+ setDatasetValues(inputDataset, cluster);
+
+ if (entity.getAvailabilityFlag() == null) {
+ inputDataset.setDoneFlag("");
+ } else {
+ inputDataset.setDoneFlag(entity.getAvailabilityFlag());
+ }
+ }
+
+ private void initializeOutputDataSet(Cluster cluster, COORDINATORAPP coord,
+ Storage storage) throws FalconException {
+ SYNCDATASET outputDataset = (SYNCDATASET)coord.getDatasets().getDatasetOrAsyncDataset().get(1);
+
+ String uriTemplate = storage.getUriTemplate(LocationType.DATA);
+ if (storage.getType() == Storage.TYPE.TABLE) {
+ uriTemplate = uriTemplate.replace("thrift", "hcat"); // Oozie requires this!!!
+ }
+ outputDataset.setUriTemplate(uriTemplate);
+
+ setDatasetValues(outputDataset, cluster);
+ }
+
+ private void setDatasetValues(SYNCDATASET dataset, Cluster cluster) {
+ dataset.setInitialInstance(SchemaHelper.formatDateUTC(
+ FeedHelper.getCluster(entity, cluster.getName()).getValidity().getStart()));
+ dataset.setTimezone(entity.getTimezone().getID());
+ dataset.setFrequency("${coord:" + entity.getFrequency().toString() + "}");
+ }
+
+ private long getReplicationDelayInMillis(Cluster srcCluster) throws FalconException {
+ Frequency replicationDelay = FeedHelper.getCluster(entity, srcCluster.getName()).getDelay();
+ long delayInMillis=0;
+ if (replicationDelay != null) {
+ delayInMillis = ExpressionHelper.get().evaluate(
+ replicationDelay.toString(), Long.class);
+ }
+
+ return delayInMillis;
+ }
+
+ private Date getStartDate(Cluster cluster, long replicationDelayInMillis) {
+ Date startDate = FeedHelper.getCluster(entity, cluster.getName()).getValidity().getStart();
+ return replicationDelayInMillis == 0 ? startDate : new Date(startDate.getTime() + replicationDelayInMillis);
+ }
+
+ private Date getEndDate(Cluster cluster) {
+ return FeedHelper.getCluster(entity, cluster.getName()).getValidity().getEnd();
+ }
+
+ private boolean noOverlapExists(Date sourceStartDate, Date sourceEndDate,
+ Date targetStartDate, Date targetEndDate) {
+ return sourceStartDate.after(targetEndDate) || targetStartDate.after(sourceEndDate);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedReplicationWorkflowBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedReplicationWorkflowBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedReplicationWorkflowBuilder.java
new file mode 100644
index 0000000..00fab99
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedReplicationWorkflowBuilder.java
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie.feed;
+
+import org.apache.falcon.FalconException;
+import org.apache.falcon.Tag;
+import org.apache.falcon.entity.EntityUtil;
+import org.apache.falcon.entity.store.ConfigurationStore;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.oozie.OozieOrchestrationWorkflowBuilder;
+import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
+import org.apache.hadoop.fs.Path;
+
+import java.util.Properties;
+
+/**
+ * Builds feed replication workflow, one per source-target cluster combination.
+ */
+public class FeedReplicationWorkflowBuilder extends OozieOrchestrationWorkflowBuilder<Feed> {
+ private static final String REPLICATION_WF_TEMPLATE = "/workflow/replication-workflow.xml";
+ private static final String SOURCE_HIVE_CREDENTIAL_NAME = "falconSourceHiveAuth";
+ private static final String TARGET_HIVE_CREDENTIAL_NAME = "falconTargetHiveAuth";
+
+ public FeedReplicationWorkflowBuilder(Feed entity) {
+ super(entity, Tag.REPLICATION);
+ }
+
+ @Override public Properties build(Cluster cluster, Path buildPath) throws FalconException {
+ WORKFLOWAPP workflow = getWorkflow(REPLICATION_WF_TEMPLATE);
+ Cluster srcCluster = ConfigurationStore.get().get(EntityType.CLUSTER, buildPath.getName());
+ String wfName = EntityUtil.getWorkflowName(Tag.REPLICATION, entity).toString();
+ workflow.setName(wfName);
+
+ addLibExtensionsToWorkflow(cluster, workflow, Tag.REPLICATION);
+
+ addOozieRetries(workflow);
+
+ if (isTableStorageType(cluster)) {
+ setupHiveCredentials(cluster, srcCluster, workflow);
+ }
+
+ marshal(cluster, workflow, buildPath);
+
+ return getProperties(buildPath, wfName);
+ }
+
+ private void setupHiveCredentials(Cluster targetCluster, Cluster sourceCluster, WORKFLOWAPP workflowApp) {
+ if (isSecurityEnabled) {
+ // add hcatalog credentials for secure mode and add a reference to each action
+ addHCatalogCredentials(workflowApp, sourceCluster, SOURCE_HIVE_CREDENTIAL_NAME);
+ addHCatalogCredentials(workflowApp, targetCluster, TARGET_HIVE_CREDENTIAL_NAME);
+ }
+
+ // hive-site.xml file is created later in coordinator initialization but
+ // actions are set to point to that here
+
+ for (Object object : workflowApp.getDecisionOrForkOrJoin()) {
+ if (!(object instanceof org.apache.falcon.oozie.workflow.ACTION)) {
+ continue;
+ }
+
+ org.apache.falcon.oozie.workflow.ACTION action =
+ (org.apache.falcon.oozie.workflow.ACTION) object;
+ String actionName = action.getName();
+ if ("recordsize".equals(actionName)) {
+ // add reference to hive-site conf to each action
+ action.getJava().setJobXml("${wf:appPath()}/conf/falcon-source-hive-site.xml");
+
+ if (isSecurityEnabled) { // add a reference to credential in the action
+ action.setCred(SOURCE_HIVE_CREDENTIAL_NAME);
+ }
+ } else if ("table-export".equals(actionName)) {
+ if (isSecurityEnabled) { // add a reference to credential in the action
+ action.setCred(SOURCE_HIVE_CREDENTIAL_NAME);
+ }
+ } else if ("table-import".equals(actionName)) {
+ if (isSecurityEnabled) { // add a reference to credential in the action
+ action.setCred(TARGET_HIVE_CREDENTIAL_NAME);
+ }
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedRetentionCoordinatorBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedRetentionCoordinatorBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedRetentionCoordinatorBuilder.java
new file mode 100644
index 0000000..4393c94
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedRetentionCoordinatorBuilder.java
@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie.feed;
+
+import org.apache.falcon.FalconException;
+import org.apache.falcon.Tag;
+import org.apache.falcon.entity.FeedHelper;
+import org.apache.falcon.entity.Storage;
+import org.apache.falcon.entity.v0.Frequency.TimeUnit;
+import org.apache.falcon.entity.v0.SchemaHelper;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.messaging.EntityInstanceMessage.ARG;
+import org.apache.falcon.messaging.EntityInstanceMessage.EntityOps;
+import org.apache.falcon.oozie.OozieCoordinatorBuilder;
+import org.apache.falcon.oozie.OozieEntityBuilder;
+import org.apache.falcon.oozie.OozieOrchestrationWorkflowBuilder;
+import org.apache.falcon.oozie.coordinator.ACTION;
+import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
+import org.apache.falcon.oozie.coordinator.WORKFLOW;
+import org.apache.hadoop.fs.Path;
+
+import java.util.Arrays;
+import java.util.Date;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * Builds feed retention coordinator.
+ */
+public class FeedRetentionCoordinatorBuilder extends OozieCoordinatorBuilder<Feed> {
+ public FeedRetentionCoordinatorBuilder(Feed entity) {
+ super(entity, Tag.RETENTION);
+ }
+
+ @Override public List<Properties> buildCoords(Cluster cluster, Path buildPath) throws FalconException {
+ org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(entity, cluster.getName());
+
+ if (feedCluster.getValidity().getEnd().before(new Date())) {
+ LOG.warn("Feed Retention is not applicable as Feed's end time for cluster {} is not in the future",
+ cluster.getName());
+ return null;
+ }
+
+ COORDINATORAPP coord = new COORDINATORAPP();
+ String coordName = getEntityName();
+ coord.setName(coordName);
+ coord.setEnd(SchemaHelper.formatDateUTC(feedCluster.getValidity().getEnd()));
+ coord.setStart(SchemaHelper.formatDateUTC(new Date()));
+ coord.setTimezone(entity.getTimezone().getID());
+ TimeUnit timeUnit = entity.getFrequency().getTimeUnit();
+ if (timeUnit == TimeUnit.hours || timeUnit == TimeUnit.minutes) {
+ coord.setFrequency("${coord:hours(6)}");
+ } else {
+ coord.setFrequency("${coord:days(1)}");
+ }
+
+ Path coordPath = getBuildPath(buildPath);
+ Properties props = createCoordDefaultConfiguration(cluster, coordName);
+ props.put("timeZone", entity.getTimezone().getID());
+ props.put("frequency", entity.getFrequency().getTimeUnit().name());
+
+ final Storage storage = FeedHelper.createStorage(cluster, entity);
+ props.put("falconFeedStorageType", storage.getType().name());
+
+ String feedDataPath = storage.getUriTemplate();
+ props.put("feedDataPath",
+ feedDataPath.replaceAll(Storage.DOLLAR_EXPR_START_REGEX, Storage.QUESTION_EXPR_START_REGEX));
+
+ props.put("limit", feedCluster.getRetention().getLimit().toString());
+
+ props.put(ARG.operation.getPropName(), EntityOps.DELETE.name());
+ props.put(ARG.feedNames.getPropName(), entity.getName());
+ props.put(ARG.feedInstancePaths.getPropName(), IGNORE);
+
+ props.put("falconInputFeeds", entity.getName());
+ props.put("falconInPaths", IGNORE);
+
+ props.putAll(FeedHelper.getUserWorkflowProperties("eviction"));
+
+ WORKFLOW workflow = new WORKFLOW();
+ Properties wfProp = OozieOrchestrationWorkflowBuilder.get(entity, Tag.RETENTION).build(cluster, coordPath);
+ workflow.setAppPath(getStoragePath(wfProp.getProperty(OozieEntityBuilder.ENTITY_PATH)));
+ workflow.setConfiguration(getConfig(props));
+ ACTION action = new ACTION();
+ action.setWorkflow(workflow);
+
+ coord.setAction(action);
+
+ marshal(cluster, coord, coordPath);
+
+ return Arrays.asList(getProperties(coordPath, coordName));
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedRetentionWorkflowBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedRetentionWorkflowBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedRetentionWorkflowBuilder.java
new file mode 100644
index 0000000..4a7f96b
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/feed/FeedRetentionWorkflowBuilder.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie.feed;
+
+import org.apache.falcon.FalconException;
+import org.apache.falcon.Tag;
+import org.apache.falcon.entity.EntityUtil;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.oozie.OozieOrchestrationWorkflowBuilder;
+import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
+import org.apache.hadoop.fs.Path;
+
+import java.util.Properties;
+
+/**
+ * Builds feed retention workflow.
+ */
+public class FeedRetentionWorkflowBuilder extends OozieOrchestrationWorkflowBuilder<Feed> {
+ private static final String RETENTION_WF_TEMPLATE = "/workflow/retention-workflow.xml";
+
+ public FeedRetentionWorkflowBuilder(Feed entity) {
+ super(entity, Tag.DEFAULT);
+ }
+
+ @Override public Properties build(Cluster cluster, Path buildPath) throws FalconException {
+ WORKFLOWAPP workflow = getWorkflow(RETENTION_WF_TEMPLATE);
+ String wfName = EntityUtil.getWorkflowName(Tag.RETENTION, entity).toString();
+ workflow.setName(wfName);
+ addLibExtensionsToWorkflow(cluster, workflow, Tag.RETENTION);
+ addOozieRetries(workflow);
+
+ if (isTableStorageType(cluster)) {
+ setupHiveCredentials(cluster, buildPath, workflow);
+ }
+
+ marshal(cluster, workflow, buildPath);
+ return getProperties(buildPath, wfName);
+ }
+
+ private void setupHiveCredentials(Cluster cluster, Path wfPath,
+ WORKFLOWAPP workflowApp) throws FalconException {
+ if (isSecurityEnabled) {
+ // add hcatalog credentials for secure mode and add a reference to each action
+ addHCatalogCredentials(workflowApp, cluster, HIVE_CREDENTIAL_NAME);
+ }
+
+ // create hive-site.xml file so actions can use it in the classpath
+ createHiveConfiguration(cluster, wfPath, ""); // no prefix since only one hive instance
+
+ for (Object object : workflowApp.getDecisionOrForkOrJoin()) {
+ if (!(object instanceof org.apache.falcon.oozie.workflow.ACTION)) {
+ continue;
+ }
+
+ org.apache.falcon.oozie.workflow.ACTION action =
+ (org.apache.falcon.oozie.workflow.ACTION) object;
+ String actionName = action.getName();
+ if ("eviction".equals(actionName)) {
+ // add reference to hive-site conf to each action
+ action.getJava().setJobXml("${wf:appPath()}/conf/hive-site.xml");
+
+ if (isSecurityEnabled) {
+ // add a reference to credential in the action
+ action.setCred(HIVE_CREDENTIAL_NAME);
+ }
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/process/HiveProcessWorkflowBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/process/HiveProcessWorkflowBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/process/HiveProcessWorkflowBuilder.java
new file mode 100644
index 0000000..79a1883
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/process/HiveProcessWorkflowBuilder.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie.process;
+
+import org.apache.falcon.FalconException;
+import org.apache.falcon.entity.ProcessHelper;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.process.Process;
+import org.apache.falcon.oozie.hive.CONFIGURATION.Property;
+import org.apache.falcon.oozie.workflow.ACTION;
+import org.apache.falcon.oozie.workflow.CONFIGURATION;
+import org.apache.falcon.util.OozieUtils;
+import org.apache.hadoop.fs.Path;
+
+import javax.xml.bind.JAXBElement;
+import java.util.List;
+
+/**
+ * Builds orchestration workflow for process where engine is hive.
+ */
+public class HiveProcessWorkflowBuilder extends ProcessExecutionWorkflowBuilder {
+ public HiveProcessWorkflowBuilder(Process entity) {
+ super(entity);
+ }
+
+ @Override protected void decorateAction(ACTION action, Cluster cluster, Path buildPath) throws FalconException {
+ if (!action.getName().equals("user-hive-job")) {
+ return;
+ }
+
+ JAXBElement<org.apache.falcon.oozie.hive.ACTION> actionJaxbElement = OozieUtils.unMarshalHiveAction(action);
+ org.apache.falcon.oozie.hive.ACTION hiveAction = actionJaxbElement.getValue();
+
+ Path userWfPath = ProcessHelper.getUserWorkflowPath(entity, cluster, buildPath);
+ hiveAction.setScript(getStoragePath(userWfPath));
+
+ addPrepareDeleteOutputPath(hiveAction);
+
+ final List<String> paramList = hiveAction.getParam();
+ addInputFeedsAsParams(paramList, cluster);
+ addOutputFeedsAsParams(paramList, cluster);
+
+ propagateEntityProperties(hiveAction);
+
+ // adds hive-site.xml in hive classpath
+ hiveAction.setJobXml("${wf:appPath()}/conf/hive-site.xml");
+
+ addArchiveForCustomJars(cluster, hiveAction.getArchive(), ProcessHelper.getUserLibPath(entity, cluster,
+ buildPath));
+
+ OozieUtils.marshalHiveAction(action, actionJaxbElement);
+ }
+
+ private void propagateEntityProperties(org.apache.falcon.oozie.hive.ACTION hiveAction) {
+ CONFIGURATION conf = new CONFIGURATION();
+ super.propagateEntityProperties(conf, hiveAction.getParam());
+
+ List<Property> hiveConf = hiveAction.getConfiguration().getProperty();
+ for (CONFIGURATION.Property prop : conf.getProperty()) {
+ Property hiveProp = new Property();
+ hiveProp.setName(prop.getName());
+ hiveProp.setValue(prop.getValue());
+ hiveConf.add(hiveProp);
+ }
+ }
+
+ private void addPrepareDeleteOutputPath(org.apache.falcon.oozie.hive.ACTION hiveAction) throws FalconException {
+
+ List<String> deleteOutputPathList = getPrepareDeleteOutputPathList();
+ if (deleteOutputPathList.isEmpty()) {
+ return;
+ }
+
+ org.apache.falcon.oozie.hive.PREPARE prepare = new org.apache.falcon.oozie.hive.PREPARE();
+ List<org.apache.falcon.oozie.hive.DELETE> deleteList = prepare.getDelete();
+
+ for (String deletePath : deleteOutputPathList) {
+ org.apache.falcon.oozie.hive.DELETE delete = new org.apache.falcon.oozie.hive.DELETE();
+ delete.setPath(deletePath);
+ deleteList.add(delete);
+ }
+
+ if (!deleteList.isEmpty()) {
+ hiveAction.setPrepare(prepare);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/process/OozieProcessWorkflowBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/process/OozieProcessWorkflowBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/process/OozieProcessWorkflowBuilder.java
new file mode 100644
index 0000000..977d8c1
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/process/OozieProcessWorkflowBuilder.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie.process;
+
+import org.apache.falcon.FalconException;
+import org.apache.falcon.entity.ProcessHelper;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.process.Process;
+import org.apache.falcon.oozie.workflow.ACTION;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * Builds oozie workflow for process where the engine is oozie.
+ */
+public class OozieProcessWorkflowBuilder extends ProcessExecutionWorkflowBuilder {
+ public OozieProcessWorkflowBuilder(Process entity) {
+ super(entity);
+ }
+
+ @Override protected void decorateAction(ACTION action, Cluster cluster, Path buildPath) throws FalconException {
+ if (!action.getName().equals("user-oozie-workflow")) {
+ return;
+ }
+ action.getSubWorkflow().setAppPath(getStoragePath(ProcessHelper.getUserWorkflowPath(entity, cluster,
+ buildPath)));
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/process/PigProcessWorkflowBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/process/PigProcessWorkflowBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/process/PigProcessWorkflowBuilder.java
new file mode 100644
index 0000000..29f601d
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/process/PigProcessWorkflowBuilder.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie.process;
+
+import org.apache.falcon.FalconException;
+import org.apache.falcon.entity.ProcessHelper;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.process.Process;
+import org.apache.falcon.oozie.workflow.ACTION;
+import org.apache.falcon.oozie.workflow.DELETE;
+import org.apache.falcon.oozie.workflow.PIG;
+import org.apache.falcon.oozie.workflow.PREPARE;
+import org.apache.hadoop.fs.Path;
+
+import java.util.List;
+
+/**
+ * Builds orchestration workflow for process where engine is pig.
+ */
+public class PigProcessWorkflowBuilder extends ProcessExecutionWorkflowBuilder {
+
+ public PigProcessWorkflowBuilder(Process entity) {
+ super(entity);
+ }
+
+ @Override protected void decorateAction(ACTION action, Cluster cluster, Path buildPath) throws FalconException {
+ if (!action.getName().equals("user-pig-job")) {
+ return;
+ }
+
+ PIG pigAction = action.getPig();
+ Path userWfPath = ProcessHelper.getUserWorkflowPath(entity, cluster, buildPath);
+ pigAction.setScript(getStoragePath(userWfPath));
+
+ addPrepareDeleteOutputPath(pigAction);
+
+ final List<String> paramList = pigAction.getParam();
+ addInputFeedsAsParams(paramList, cluster);
+ addOutputFeedsAsParams(paramList, cluster);
+
+ propagateEntityProperties(pigAction.getConfiguration(), pigAction.getParam());
+
+ if (isTableStorageType(cluster)) { // adds hive-site.xml in pig classpath
+ pigAction.getFile().add("${wf:appPath()}/conf/hive-site.xml");
+ }
+
+ addArchiveForCustomJars(cluster, pigAction.getArchive(), ProcessHelper.getUserLibPath(entity, cluster,
+ buildPath));
+ }
+
+ private void addPrepareDeleteOutputPath(PIG pigAction) throws FalconException {
+ List<String> deleteOutputPathList = getPrepareDeleteOutputPathList();
+ if (deleteOutputPathList.isEmpty()) {
+ return;
+ }
+
+ final PREPARE prepare = new PREPARE();
+ final List<DELETE> deleteList = prepare.getDelete();
+
+ for (String deletePath : deleteOutputPathList) {
+ final DELETE delete = new DELETE();
+ delete.setPath(deletePath);
+ deleteList.add(delete);
+ }
+
+ if (!deleteList.isEmpty()) {
+ pigAction.setPrepare(prepare);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/main/java/org/apache/falcon/oozie/process/ProcessBundleBuilder.java
----------------------------------------------------------------------
diff --git a/oozie/src/main/java/org/apache/falcon/oozie/process/ProcessBundleBuilder.java b/oozie/src/main/java/org/apache/falcon/oozie/process/ProcessBundleBuilder.java
new file mode 100644
index 0000000..86cea93
--- /dev/null
+++ b/oozie/src/main/java/org/apache/falcon/oozie/process/ProcessBundleBuilder.java
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie.process;
+
+import org.apache.falcon.FalconException;
+import org.apache.falcon.Tag;
+import org.apache.falcon.entity.ClusterHelper;
+import org.apache.falcon.entity.EntityUtil;
+import org.apache.falcon.entity.FeedHelper;
+import org.apache.falcon.entity.ProcessHelper;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.Frequency.TimeUnit;
+import org.apache.falcon.entity.v0.SchemaHelper;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.entity.v0.feed.LocationType;
+import org.apache.falcon.entity.v0.process.Input;
+import org.apache.falcon.entity.v0.process.Process;
+import org.apache.falcon.hadoop.HadoopClientFactory;
+import org.apache.falcon.oozie.OozieBundleBuilder;
+import org.apache.falcon.oozie.OozieCoordinatorBuilder;
+import org.apache.falcon.update.UpdateHelper;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.oozie.client.CoordinatorJob.Timeunit;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+/**
+ * Builds oozie bundle for process - schedulable entity in oozie.
+ */
+public class ProcessBundleBuilder extends OozieBundleBuilder<Process> {
+
+ public ProcessBundleBuilder(Process entity) {
+ super(entity);
+ }
+
+ @Override protected Properties getAdditionalProperties(Cluster cluster) throws FalconException {
+ Properties properties = new Properties();
+ if (entity.getInputs() != null) {
+ for (Input in : entity.getInputs().getInputs()) {
+ if (in.isOptional()) {
+ properties.putAll(getOptionalInputProperties(in, cluster.getName()));
+ }
+ }
+ }
+ return properties;
+ }
+
+ private Properties getOptionalInputProperties(Input in, String clusterName) throws FalconException {
+ Properties properties = new Properties();
+ Feed feed = EntityUtil.getEntity(EntityType.FEED, in.getFeed());
+ org.apache.falcon.entity.v0.feed.Cluster cluster = FeedHelper.getCluster(feed, clusterName);
+ String inName = in.getName();
+ properties.put(inName + ".frequency", String.valueOf(feed.getFrequency().getFrequency()));
+ properties.put(inName + ".freq_timeunit", mapToCoordTimeUnit(feed.getFrequency().getTimeUnit()).name());
+ properties.put(inName + ".timezone", feed.getTimezone().getID());
+ properties.put(inName + ".end_of_duration", Timeunit.NONE.name());
+ properties.put(inName + ".initial-instance", SchemaHelper.formatDateUTC(cluster.getValidity().getStart()));
+ properties.put(inName + ".done-flag", "notused");
+
+ String locPath = FeedHelper.createStorage(clusterName, feed)
+ .getUriTemplate(LocationType.DATA).replace('$', '%');
+ properties.put(inName + ".uri-template", locPath);
+
+ properties.put(inName + ".start-instance", in.getStart());
+ properties.put(inName + ".end-instance", in.getEnd());
+ return properties;
+ }
+
+ private Timeunit mapToCoordTimeUnit(TimeUnit tu) {
+ switch (tu) {
+ case days:
+ return Timeunit.DAY;
+
+ case hours:
+ return Timeunit.HOUR;
+
+ case minutes:
+ return Timeunit.MINUTE;
+
+ case months:
+ return Timeunit.MONTH;
+
+ default:
+ throw new IllegalArgumentException("Unhandled time unit " + tu);
+ }
+ }
+
+ @Override protected Path getLibPath(Cluster cluster, Path buildPath) throws FalconException {
+ return ProcessHelper.getUserLibPath(entity, cluster, buildPath);
+ }
+
+ @Override protected List<Properties> doBuild(Cluster cluster, Path buildPath) throws FalconException {
+ copyUserWorkflow(cluster, buildPath);
+
+ return OozieCoordinatorBuilder.get(entity, Tag.DEFAULT).buildCoords(cluster, buildPath);
+ }
+
+ private void copyUserWorkflow(Cluster cluster, Path buildPath) throws FalconException {
+ try {
+ FileSystem fs = HadoopClientFactory.get().createFileSystem(ClusterHelper.getConfiguration(cluster));
+
+ //Copy user workflow and lib to staging dir
+ Map<String, String> checksums = UpdateHelper.checksumAndCopy(fs, new Path(entity.getWorkflow().getPath()),
+ new Path(buildPath, EntityUtil.PROCESS_USER_DIR));
+ if (entity.getWorkflow().getLib() != null && fs.exists(new Path(entity.getWorkflow().getLib()))) {
+ checksums.putAll(UpdateHelper.checksumAndCopy(fs, new Path(entity.getWorkflow().getLib()),
+ new Path(buildPath, EntityUtil.PROCESS_USERLIB_DIR)));
+ }
+
+ writeChecksums(fs, new Path(buildPath, EntityUtil.PROCESS_CHECKSUM_FILE), checksums);
+ } catch (IOException e) {
+ throw new FalconException("Failed to copy user workflow/lib", e);
+ }
+ }
+
+ private void writeChecksums(FileSystem fs, Path path, Map<String, String> checksums) throws FalconException {
+ try {
+ FSDataOutputStream stream = fs.create(path);
+ try {
+ for (Map.Entry<String, String> entry : checksums.entrySet()) {
+ stream.write((entry.getKey() + "=" + entry.getValue() + "\n").getBytes());
+ }
+ } finally {
+ stream.close();
+ }
+ } catch (IOException e) {
+ throw new FalconException("Failed to copy user workflow/lib", e);
+ }
+ }
+}
[9/9] git commit: FALCON-369 Refactor workflow builder. Contributed
by Shwetha GS
Posted by sh...@apache.org.
FALCON-369 Refactor workflow builder. Contributed by Shwetha GS
Project: http://git-wip-us.apache.org/repos/asf/incubator-falcon/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-falcon/commit/185b5888
Tree: http://git-wip-us.apache.org/repos/asf/incubator-falcon/tree/185b5888
Diff: http://git-wip-us.apache.org/repos/asf/incubator-falcon/diff/185b5888
Branch: refs/heads/master
Commit: 185b58885df921a0179ae2aff8e32b4ec591fa15
Parents: 57953f7
Author: Shwetha GS <sh...@inmobi.com>
Authored: Thu Jul 10 12:27:13 2014 +0530
Committer: Shwetha GS <sh...@inmobi.com>
Committed: Thu Jul 10 12:27:13 2014 +0530
----------------------------------------------------------------------
CHANGES.txt | 2 +
.../apache/falcon/entity/CatalogStorage.java | 11 +-
.../org/apache/falcon/entity/ClusterHelper.java | 2 +-
.../org/apache/falcon/entity/EntityUtil.java | 47 +-
.../org/apache/falcon/entity/FeedHelper.java | 18 +-
.../org/apache/falcon/entity/ProcessHelper.java | 39 +
.../apache/falcon/workflow/WorkflowBuilder.java | 51 --
.../falcon/entity/CatalogStorageTest.java | 4 +-
feed/pom.xml | 105 ---
.../workflow/OozieFeedWorkflowBuilder.java | 728 ---------------
.../coordinator/replication-coordinator.xml | 51 --
.../config/workflow/falcon-table-export.hql | 18 -
.../config/workflow/falcon-table-import.hql | 20 -
.../config/workflow/replication-workflow.xml | 330 -------
.../config/workflow/retention-workflow.xml | 208 -----
.../converter/OozieFeedWorkflowBuilderTest.java | 669 --------------
feed/src/test/resources/feed.xml | 56 --
feed/src/test/resources/fs-replication-feed.xml | 68 --
feed/src/test/resources/src-cluster.xml | 40 -
.../test/resources/table-replication-feed.xml | 42 -
feed/src/test/resources/trg-cluster-alpha.xml | 39 -
feed/src/test/resources/trg-cluster-beta.xml | 39 -
feed/src/test/resources/trg-cluster.xml | 40 -
.../apache/falcon/oozie/OozieBundleBuilder.java | 143 +++
.../falcon/oozie/OozieCoordinatorBuilder.java | 181 ++++
.../apache/falcon/oozie/OozieEntityBuilder.java | 306 +++++++
.../OozieOrchestrationWorkflowBuilder.java | 302 +++++++
.../falcon/oozie/feed/FeedBundleBuilder.java | 65 ++
.../feed/FeedReplicationCoordinatorBuilder.java | 418 +++++++++
.../feed/FeedReplicationWorkflowBuilder.java | 101 +++
.../feed/FeedRetentionCoordinatorBuilder.java | 110 +++
.../feed/FeedRetentionWorkflowBuilder.java | 87 ++
.../process/HiveProcessWorkflowBuilder.java | 103 +++
.../process/OozieProcessWorkflowBuilder.java | 43 +
.../process/PigProcessWorkflowBuilder.java | 87 ++
.../oozie/process/ProcessBundleBuilder.java | 152 ++++
.../ProcessExecutionCoordinatorBuilder.java | 336 +++++++
.../ProcessExecutionWorkflowBuilder.java | 233 +++++
.../falcon/workflow/OozieWorkflowBuilder.java | 636 -------------
.../workflow/engine/OozieWorkflowEngine.java | 95 +-
.../coordinator/replication-coordinator.xml | 51 ++
.../resources/workflow/falcon-table-export.hql | 18 +
.../resources/workflow/falcon-table-import.hql | 20 +
.../workflow/process-parent-workflow.xml | 278 ++++++
.../resources/workflow/replication-workflow.xml | 330 +++++++
.../resources/workflow/retention-workflow.xml | 208 +++++
.../feed/OozieFeedWorkflowBuilderTest.java | 673 ++++++++++++++
.../falcon/oozie/process/AbstractTestBase.java | 141 +++
.../OozieProcessWorkflowBuilderTest.java | 767 ++++++++++++++++
.../resources/config/cluster/cluster-0.1.xml | 44 +
.../src/test/resources/config/feed/feed-0.1.xml | 63 ++
.../config/feed/hive-table-feed-out.xml | 43 +
.../resources/config/feed/hive-table-feed.xml | 43 +
.../test/resources/config/late/late-cluster.xml | 43 +
.../test/resources/config/late/late-feed1.xml | 53 ++
.../test/resources/config/late/late-feed2.xml | 53 ++
.../test/resources/config/late/late-feed3.xml | 53 ++
.../resources/config/late/late-process1.xml | 41 +
.../resources/config/late/late-process2.xml | 57 ++
.../config/process/dumb-hive-process.xml | 39 +
.../resources/config/process/dumb-process.xml | 40 +
.../config/process/hive-process-FSInputFeed.xml | 46 +
.../process/hive-process-FSOutputFeed.xml | 46 +
.../resources/config/process/hive-process.xml | 46 +
.../config/process/pig-process-0.1.xml | 53 ++
.../config/process/pig-process-table.xml | 46 +
.../resources/config/process/process-0.1.xml | 45 +
oozie/src/test/resources/feed/feed.xml | 56 ++
.../test/resources/feed/fs-replication-feed.xml | 68 ++
oozie/src/test/resources/feed/src-cluster.xml | 40 +
.../resources/feed/table-replication-feed.xml | 42 +
.../test/resources/feed/trg-cluster-alpha.xml | 39 +
.../test/resources/feed/trg-cluster-beta.xml | 39 +
oozie/src/test/resources/feed/trg-cluster.xml | 40 +
pom.xml | 2 -
.../org/apache/falcon/util/EmbeddedServer.java | 1 +
process/pom.xml | 118 ---
.../workflow/OozieProcessWorkflowBuilder.java | 904 -------------------
.../config/workflow/process-parent-workflow.xml | 278 ------
.../falcon/converter/AbstractTestBase.java | 83 --
.../OozieProcessWorkflowBuilderTest.java | 799 ----------------
.../resources/config/cluster/cluster-0.1.xml | 44 -
.../src/test/resources/config/feed/feed-0.1.xml | 63 --
.../config/feed/hive-table-feed-out.xml | 43 -
.../resources/config/feed/hive-table-feed.xml | 43 -
.../test/resources/config/late/late-cluster.xml | 43 -
.../test/resources/config/late/late-feed1.xml | 53 --
.../test/resources/config/late/late-feed2.xml | 53 --
.../test/resources/config/late/late-feed3.xml | 53 --
.../resources/config/late/late-process1.xml | 41 -
.../resources/config/late/late-process2.xml | 57 --
.../config/process/dumb-hive-process.xml | 39 -
.../resources/config/process/dumb-process.xml | 40 -
.../config/process/hive-process-FSInputFeed.xml | 46 -
.../process/hive-process-FSOutputFeed.xml | 46 -
.../resources/config/process/hive-process.xml | 46 -
.../config/process/pig-process-0.1.xml | 53 --
.../config/process/pig-process-table.xml | 46 -
.../resources/config/process/process-0.1.xml | 45 -
src/main/examples/app/hive/wordcount.hql | 2 +-
src/main/examples/app/pig/hcat-wordcount.pig | 6 +-
src/main/examples/data/hcat-generate.sh | 2 +
.../examples/entity/filesystem/pig-process.xml | 4 +-
.../entity/filesystem/replication-feed.xml | 46 +
.../entity/filesystem/standalone-cluster.xml | 2 +-
.../filesystem/standalone-target-cluster.xml | 43 +
src/main/examples/entity/hcat/hcat-in-feed.xml | 4 +-
src/main/examples/entity/hcat/hcat-out-feed.xml | 4 +-
.../examples/entity/hcat/hcat-pig-process.xml | 10 +-
.../entity/hcat/hcat-replication-feed.xml | 42 +
.../entity/hcat/hcat-standalone-cluster.xml | 8 +-
.../hcat/hcat-standalone-target-cluster.xml | 45 +
src/main/examples/entity/hcat/hive-process.xml | 10 +-
webapp/pom.xml | 1 +
114 files changed, 6686 insertions(+), 6271 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 4fd1cae..35eaf7e 100755
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -8,6 +8,8 @@ Trunk (Unreleased)
FALCON-263 API to get workflow parameters. (pavan kumar kolamuri via Shwetha GS)
IMPROVEMENTS
+ FALCON-369 Refactor workflow builder. (Shwetha GS)
+
FALCON-280 Validate the ACL in Feed entity with the user submitting the entity
(Jean-Baptiste Onofré via Venkatesh Seetharam)
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/common/src/main/java/org/apache/falcon/entity/CatalogStorage.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/falcon/entity/CatalogStorage.java b/common/src/main/java/org/apache/falcon/entity/CatalogStorage.java
index f7b592d..89e5b3e 100644
--- a/common/src/main/java/org/apache/falcon/entity/CatalogStorage.java
+++ b/common/src/main/java/org/apache/falcon/entity/CatalogStorage.java
@@ -28,7 +28,9 @@ import org.apache.falcon.entity.v0.feed.LocationType;
import java.net.URI;
import java.net.URISyntaxException;
+import java.util.ArrayList;
import java.util.LinkedHashMap;
+import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
@@ -229,19 +231,18 @@ public class CatalogStorage implements Storage {
return partitions.containsKey(key);
}
- public String getDatedPartitionKey() {
- String datedPartitionKey = null;
+ public List<String> getDatedPartitionKeys() {
+ List<String> keys = new ArrayList<String>();
for (Map.Entry<String, String> entry : getPartitions().entrySet()) {
Matcher matcher = FeedDataPath.PATTERN.matcher(entry.getValue());
if (matcher.find()) {
- datedPartitionKey = entry.getKey();
- break;
+ keys.add(entry.getKey());
}
}
- return datedPartitionKey;
+ return keys;
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/common/src/main/java/org/apache/falcon/entity/ClusterHelper.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/falcon/entity/ClusterHelper.java b/common/src/main/java/org/apache/falcon/entity/ClusterHelper.java
index 5284d68..cb3ea08 100644
--- a/common/src/main/java/org/apache/falcon/entity/ClusterHelper.java
+++ b/common/src/main/java/org/apache/falcon/entity/ClusterHelper.java
@@ -140,7 +140,7 @@ public final class ClusterHelper {
return null;
}
- public static Map<String, String> geHiveProperties(Cluster cluster) {
+ public static Map<String, String> getHiveProperties(Cluster cluster) {
if (cluster.getProperties() != null) {
List<Property> properties = cluster.getProperties().getProperties();
if (properties != null && !properties.isEmpty()) {
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/common/src/main/java/org/apache/falcon/entity/EntityUtil.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/falcon/entity/EntityUtil.java b/common/src/main/java/org/apache/falcon/entity/EntityUtil.java
index b4bc07d..a38e553 100644
--- a/common/src/main/java/org/apache/falcon/entity/EntityUtil.java
+++ b/common/src/main/java/org/apache/falcon/entity/EntityUtil.java
@@ -29,7 +29,7 @@ import org.apache.falcon.entity.v0.Entity;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.Frequency;
import org.apache.falcon.entity.v0.SchemaHelper;
-import org.apache.falcon.entity.v0.feed.Cluster;
+import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.entity.v0.feed.ClusterType;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.process.*;
@@ -435,6 +435,20 @@ public final class EntityUtil {
return builder.getWorkflowTag(workflowName);
}
+ public static List<String> getWorkflowNames(Entity entity, String cluster) {
+ switch(entity.getEntityType()) {
+ case FEED:
+ return Arrays.asList(getWorkflowName(Tag.RETENTION, entity).toString(),
+ getWorkflowName(Tag.REPLICATION, entity).toString());
+
+ case PROCESS:
+ return Arrays.asList(getWorkflowName(Tag.DEFAULT, entity).toString());
+
+ default:
+ }
+ throw new IllegalArgumentException("Unhandled type: " + entity.getEntityType());
+ }
+
public static <T extends Entity> T getClusterView(T entity, String clusterName) {
switch (entity.getEntityType()) {
case CLUSTER:
@@ -442,10 +456,10 @@ public final class EntityUtil {
case FEED:
Feed feed = (Feed) entity.copy();
- Cluster feedCluster = FeedHelper.getCluster(feed, clusterName);
- Iterator<Cluster> itr = feed.getClusters().getClusters().iterator();
+ org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, clusterName);
+ Iterator<org.apache.falcon.entity.v0.feed.Cluster> itr = feed.getClusters().getClusters().iterator();
while (itr.hasNext()) {
- Cluster cluster = itr.next();
+ org.apache.falcon.entity.v0.feed.Cluster cluster = itr.next();
//In addition to retaining the required clster, retain the sources clusters if this is the target
// cluster
//1. Retain cluster if cluster n
@@ -482,7 +496,7 @@ public final class EntityUtil {
case FEED:
Feed feed = (Feed) entity;
- for (Cluster cluster : feed.getClusters().getClusters()) {
+ for (org.apache.falcon.entity.v0.feed.Cluster cluster : feed.getClusters().getClusters()) {
clusters.add(cluster.getName());
}
break;
@@ -642,4 +656,27 @@ public final class EntityUtil {
return DeploymentUtil.isEmbeddedMode() || (!DeploymentUtil.isPrism()
&& colo.equals(DeploymentUtil.getCurrentColo()));
}
+
+ public static Date getNextStartTime(Entity entity, Cluster cluster, Date effectiveTime) {
+ switch(entity.getEntityType()) {
+ case FEED:
+ Feed feed = (Feed) entity;
+ org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName());
+ return getNextStartTime(feedCluster.getValidity().getStart(), feed.getFrequency(), feed.getTimezone(),
+ effectiveTime);
+
+ case PROCESS:
+ Process process = (Process) entity;
+ org.apache.falcon.entity.v0.process.Cluster processCluster = ProcessHelper.getCluster(process,
+ cluster.getName());
+ return getNextStartTime(processCluster.getValidity().getStart(), process.getFrequency(),
+ process.getTimezone(), effectiveTime);
+
+ default:
+ }
+
+ throw new IllegalArgumentException("Unhandled type: " + entity.getEntityType());
+ }
+
+
}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/common/src/main/java/org/apache/falcon/entity/FeedHelper.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/falcon/entity/FeedHelper.java b/common/src/main/java/org/apache/falcon/entity/FeedHelper.java
index 44d8d01..8c61ac2 100644
--- a/common/src/main/java/org/apache/falcon/entity/FeedHelper.java
+++ b/common/src/main/java/org/apache/falcon/entity/FeedHelper.java
@@ -29,6 +29,7 @@ import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.feed.Location;
import org.apache.falcon.entity.v0.feed.Locations;
import org.apache.falcon.expression.ExpressionHelper;
+import org.apache.falcon.util.BuildProperties;
import java.net.URISyntaxException;
import java.util.Arrays;
@@ -252,7 +253,7 @@ public final class FeedHelper {
Feed feed, CatalogStorage storage, Tag tag, String suffix) {
String stagingDirPath = getStagingDir(clusterEntity, feed, storage, tag);
- String datedPartitionKey = storage.getDatedPartitionKey();
+ String datedPartitionKey = storage.getDatedPartitionKeys().get(0);
String datedPartitionKeySuffix = datedPartitionKey + "=${coord:dataOutPartitionValue('output',"
+ "'" + datedPartitionKey + "')}";
return stagingDirPath + "/"
@@ -273,4 +274,19 @@ public final class FeedHelper {
+ storage.getDatabase() + "/"
+ storage.getTable();
}
+
+ public static Properties getUserWorkflowProperties(String policy) {
+ Properties props = new Properties();
+ props.put("userWorkflowName", policy + "-policy");
+ props.put("userWorkflowEngine", "falcon");
+
+ String version;
+ try {
+ version = BuildProperties.get().getProperty("build.version");
+ } catch (Exception e) { // unfortunate that this is only available in prism/webapp
+ version = "0.5";
+ }
+ props.put("userWorkflowVersion", version);
+ return props;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/common/src/main/java/org/apache/falcon/entity/ProcessHelper.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/falcon/entity/ProcessHelper.java b/common/src/main/java/org/apache/falcon/entity/ProcessHelper.java
index ece8982..44dac3c 100644
--- a/common/src/main/java/org/apache/falcon/entity/ProcessHelper.java
+++ b/common/src/main/java/org/apache/falcon/entity/ProcessHelper.java
@@ -26,6 +26,11 @@ import org.apache.falcon.entity.v0.process.Cluster;
import org.apache.falcon.entity.v0.process.Input;
import org.apache.falcon.entity.v0.process.Output;
import org.apache.falcon.entity.v0.process.Process;
+import org.apache.falcon.hadoop.HadoopClientFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import java.io.IOException;
/**
* Helper methods for accessing process members.
@@ -75,4 +80,38 @@ public final class ProcessHelper {
return storageType;
}
+
+ public static Path getUserWorkflowPath(Process process, org.apache.falcon.entity.v0.cluster.Cluster cluster,
+ Path buildPath) throws FalconException {
+ try {
+ FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(ClusterHelper.getConfiguration(cluster));
+ Path wfPath = new Path(process.getWorkflow().getPath());
+ if (fs.isFile(wfPath)) {
+ return new Path(buildPath.getParent(), EntityUtil.PROCESS_USER_DIR + "/" + wfPath.getName());
+ } else {
+ return new Path(buildPath.getParent(), EntityUtil.PROCESS_USER_DIR);
+ }
+ } catch(IOException e) {
+ throw new FalconException("Failed to get workflow path", e);
+ }
+ }
+
+ public static Path getUserLibPath(Process process, org.apache.falcon.entity.v0.cluster.Cluster cluster,
+ Path buildPath) throws FalconException {
+ try {
+ if (process.getWorkflow().getLib() == null) {
+ return null;
+ }
+ Path libPath = new Path(process.getWorkflow().getLib());
+
+ FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(ClusterHelper.getConfiguration(cluster));
+ if (fs.isFile(libPath)) {
+ return new Path(buildPath.getParent(), EntityUtil.PROCESS_USERLIB_DIR + "/" + libPath.getName());
+ } else {
+ return new Path(buildPath.getParent(), EntityUtil.PROCESS_USERLIB_DIR);
+ }
+ } catch(IOException e) {
+ throw new FalconException("Failed to get user lib path", e);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/common/src/main/java/org/apache/falcon/workflow/WorkflowBuilder.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/falcon/workflow/WorkflowBuilder.java b/common/src/main/java/org/apache/falcon/workflow/WorkflowBuilder.java
deleted file mode 100644
index 1f9a8c8..0000000
--- a/common/src/main/java/org/apache/falcon/workflow/WorkflowBuilder.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.falcon.workflow;
-
-import org.apache.falcon.FalconException;
-import org.apache.falcon.entity.v0.Entity;
-import org.apache.falcon.util.ReflectionUtils;
-
-import java.util.Map;
-import java.util.Properties;
-
-/**
- * Builder for building workflow definition for the underlying scheduler.
- * @param <T>
- */
-public abstract class WorkflowBuilder<T extends Entity> {
- protected T entity;
-
- protected WorkflowBuilder(T entity) {
- this.entity = entity;
- }
-
- public T getEntity() {
- return entity;
- }
-
- public static WorkflowBuilder<Entity> getBuilder(String engine, Entity entity) throws FalconException {
- String classKey = engine + "." + entity.getEntityType().name().toLowerCase() + ".workflow.builder";
- return ReflectionUtils.getInstance(classKey, entity.getEntityType().getEntityClass(), entity);
- }
-
- public abstract Map<String, Properties> newWorkflowSchedule(String... clusters) throws FalconException;
-
- public abstract String[] getWorkflowNames();
-}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/common/src/test/java/org/apache/falcon/entity/CatalogStorageTest.java
----------------------------------------------------------------------
diff --git a/common/src/test/java/org/apache/falcon/entity/CatalogStorageTest.java b/common/src/test/java/org/apache/falcon/entity/CatalogStorageTest.java
index 972066d..5d06431 100644
--- a/common/src/test/java/org/apache/falcon/entity/CatalogStorageTest.java
+++ b/common/src/test/java/org/apache/falcon/entity/CatalogStorageTest.java
@@ -51,7 +51,7 @@ public class CatalogStorageTest {
Assert.assertTrue(storage.hasPartition("region"));
Assert.assertNull(storage.getPartitionValue("unknown"));
Assert.assertFalse(storage.hasPartition("unknown"));
- Assert.assertEquals(storage.getDatedPartitionKey(), "ds");
+ Assert.assertEquals(storage.getDatedPartitionKeys().get(0), "ds");
}
@Test
@@ -67,7 +67,7 @@ public class CatalogStorageTest {
Assert.assertTrue(storage.hasPartition("region"));
Assert.assertNull(storage.getPartitionValue("unknown"));
Assert.assertFalse(storage.hasPartition("unknown"));
- Assert.assertEquals(storage.getDatedPartitionKey(), "ds");
+ Assert.assertEquals(storage.getDatedPartitionKeys().get(0), "ds");
}
@Test
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/pom.xml
----------------------------------------------------------------------
diff --git a/feed/pom.xml b/feed/pom.xml
deleted file mode 100644
index ab82b77..0000000
--- a/feed/pom.xml
+++ /dev/null
@@ -1,105 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-
- <modelVersion>4.0.0</modelVersion>
- <parent>
- <groupId>org.apache.falcon</groupId>
- <artifactId>falcon-main</artifactId>
- <version>0.6-incubating-SNAPSHOT</version>
- </parent>
- <artifactId>falcon-feed</artifactId>
- <description>Apache Falcon Feed Module</description>
- <name>Apache Falcon Feed</name>
- <packaging>jar</packaging>
-
- <profiles>
- <profile>
- <id>hadoop-1</id>
- <activation>
- <activeByDefault>true</activeByDefault>
- </activation>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-test</artifactId>
- </dependency>
- </dependencies>
- </profile>
- <profile>
- <id>hadoop-2</id>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-hdfs</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-hdfs</artifactId>
- <classifier>tests</classifier>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- <classifier>tests</classifier>
- </dependency>
- </dependencies>
- </profile>
- </profiles>
-
- <dependencies>
- <dependency>
- <groupId>org.apache.falcon</groupId>
- <artifactId>falcon-common</artifactId>
- </dependency>
-
- <dependency>
- <groupId>org.apache.falcon</groupId>
- <artifactId>falcon-oozie-adaptor</artifactId>
- </dependency>
-
- <dependency>
- <groupId>org.apache.falcon</groupId>
- <artifactId>falcon-messaging</artifactId>
- </dependency>
-
- <dependency>
- <groupId>org.apache.falcon</groupId>
- <artifactId>falcon-test-util</artifactId>
- <scope>test</scope>
- </dependency>
-
- <dependency>
- <groupId>org.testng</groupId>
- <artifactId>testng</artifactId>
- </dependency>
- </dependencies>
-
-</project>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/src/main/java/org/apache/falcon/workflow/OozieFeedWorkflowBuilder.java
----------------------------------------------------------------------
diff --git a/feed/src/main/java/org/apache/falcon/workflow/OozieFeedWorkflowBuilder.java b/feed/src/main/java/org/apache/falcon/workflow/OozieFeedWorkflowBuilder.java
deleted file mode 100644
index 4e300bf..0000000
--- a/feed/src/main/java/org/apache/falcon/workflow/OozieFeedWorkflowBuilder.java
+++ /dev/null
@@ -1,728 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.falcon.workflow;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang.StringUtils;
-import org.apache.falcon.FalconException;
-import org.apache.falcon.Tag;
-import org.apache.falcon.entity.CatalogStorage;
-import org.apache.falcon.entity.ClusterHelper;
-import org.apache.falcon.entity.EntityUtil;
-import org.apache.falcon.entity.FeedHelper;
-import org.apache.falcon.entity.Storage;
-import org.apache.falcon.entity.store.ConfigurationStore;
-import org.apache.falcon.entity.v0.EntityType;
-import org.apache.falcon.entity.v0.Frequency;
-import org.apache.falcon.entity.v0.Frequency.TimeUnit;
-import org.apache.falcon.entity.v0.SchemaHelper;
-import org.apache.falcon.entity.v0.cluster.Cluster;
-import org.apache.falcon.entity.v0.feed.ClusterType;
-import org.apache.falcon.entity.v0.feed.Feed;
-import org.apache.falcon.entity.v0.feed.LocationType;
-import org.apache.falcon.entity.v0.feed.Property;
-import org.apache.falcon.expression.ExpressionHelper;
-import org.apache.falcon.hadoop.HadoopClientFactory;
-import org.apache.falcon.messaging.EntityInstanceMessage.ARG;
-import org.apache.falcon.messaging.EntityInstanceMessage.EntityOps;
-import org.apache.falcon.oozie.coordinator.ACTION;
-import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
-import org.apache.falcon.oozie.coordinator.SYNCDATASET;
-import org.apache.falcon.oozie.coordinator.WORKFLOW;
-import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
-import org.apache.falcon.security.CurrentUser;
-import org.apache.falcon.util.BuildProperties;
-import org.apache.falcon.util.RuntimeProperties;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-
-/**
- * Workflow definition builder for feed replication & retention.
- */
-public class OozieFeedWorkflowBuilder extends OozieWorkflowBuilder<Feed> {
- private static final Logger LOG = LoggerFactory.getLogger(OozieFeedWorkflowBuilder.class);
-
- public OozieFeedWorkflowBuilder(Feed entity) {
- super(entity);
- }
-
- @Override
- public Map<String, Properties> newWorkflowSchedule(String... clusters) throws FalconException {
- Map<String, Properties> propertiesMap = new HashMap<String, Properties>();
-
- for (String clusterName : clusters) {
- org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(entity, clusterName);
- if (!feedCluster.getValidity().getStart().before(feedCluster.getValidity().getEnd())) {
- LOG.info("feed validity start <= end for cluster {}. Skipping schedule", clusterName);
- break;
- }
-
- Cluster cluster = CONFIG_STORE.get(EntityType.CLUSTER, feedCluster.getName());
- Path bundlePath = EntityUtil.getNewStagingPath(cluster, entity);
-
- if (!map(cluster, bundlePath)) {
- break;
- }
- propertiesMap.put(clusterName, createAppProperties(clusterName, bundlePath, CurrentUser.getUser()));
- }
- return propertiesMap;
- }
-
- @Override
- public Date getNextStartTime(Feed feed, String cluster, Date now) throws FalconException {
- org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster);
- return EntityUtil.getNextStartTime(feedCluster.getValidity().getStart(),
- feed.getFrequency(), feed.getTimezone(), now);
- }
-
- @Override
- public String[] getWorkflowNames() {
- return new String[]{
- EntityUtil.getWorkflowName(Tag.RETENTION, entity).toString(),
- EntityUtil.getWorkflowName(Tag.REPLICATION, entity).toString(), };
- }
-
- private final RetentionOozieWorkflowMapper retentionMapper = new RetentionOozieWorkflowMapper();
- private final ReplicationOozieWorkflowMapper replicationMapper = new ReplicationOozieWorkflowMapper();
-
- @Override
- public List<COORDINATORAPP> getCoordinators(Cluster cluster, Path bundlePath) throws FalconException {
- List<COORDINATORAPP> coords = new ArrayList<COORDINATORAPP>();
- COORDINATORAPP retentionCoord = getRetentionCoordinator(cluster, bundlePath);
- if (retentionCoord != null) {
- coords.add(retentionCoord);
- }
- List<COORDINATORAPP> replicationCoords = getReplicationCoordinators(cluster, bundlePath);
- coords.addAll(replicationCoords);
- return coords;
- }
-
- private COORDINATORAPP getRetentionCoordinator(Cluster cluster, Path bundlePath) throws FalconException {
- org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(entity, cluster.getName());
-
- if (feedCluster.getValidity().getEnd().before(new Date())) {
- LOG.warn("Feed Retention is not applicable as Feed's end time for cluster {} is not in the future",
- cluster.getName());
- return null;
- }
-
- return retentionMapper.getRetentionCoordinator(cluster, bundlePath, entity, feedCluster);
- }
-
- private List<COORDINATORAPP> getReplicationCoordinators(Cluster targetCluster, Path bundlePath)
- throws FalconException {
- List<COORDINATORAPP> replicationCoords = new ArrayList<COORDINATORAPP>();
- if (FeedHelper.getCluster(entity, targetCluster.getName()).getType() == ClusterType.TARGET) {
- for (org.apache.falcon.entity.v0.feed.Cluster feedCluster : entity.getClusters().getClusters()) {
- if (feedCluster.getType() == ClusterType.SOURCE) {
- String coordName = EntityUtil.getWorkflowName(Tag.REPLICATION, entity).toString();
- Path coordPath = getCoordPath(bundlePath, coordName);
- Cluster srcCluster = ConfigurationStore.get().get(EntityType.CLUSTER, feedCluster.getName());
-
- // workflow is serialized to a specific dir
- Path sourceSpecificWfPath = new Path(coordPath, srcCluster.getName());
-
- // Different workflow for each source since hive credentials vary for each cluster
- replicationMapper.createReplicationWorkflow(
- targetCluster, srcCluster, sourceSpecificWfPath, coordName);
-
- COORDINATORAPP coord = replicationMapper.createAndGetCoord(
- entity, srcCluster, targetCluster, sourceSpecificWfPath);
-
- if (coord != null) {
- replicationCoords.add(coord);
- }
- }
- }
- }
-
- return replicationCoords;
- }
-
- @Override
- protected Map<String, String> getEntityProperties() {
- Map<String, String> props = new HashMap<String, String>();
- if (entity.getProperties() != null) {
- for (Property prop : entity.getProperties().getProperties()) {
- props.put(prop.getName(), prop.getValue());
- }
- }
- return props;
- }
-
- private final class RetentionOozieWorkflowMapper {
-
- private static final String RETENTION_WF_TEMPLATE = "/config/workflow/retention-workflow.xml";
-
- private COORDINATORAPP getRetentionCoordinator(Cluster cluster, Path bundlePath, Feed feed,
- org.apache.falcon.entity.v0.feed.Cluster feedCluster) throws FalconException {
- COORDINATORAPP retentionApp = new COORDINATORAPP();
- String coordName = EntityUtil.getWorkflowName(Tag.RETENTION, feed).toString();
- retentionApp.setName(coordName);
- retentionApp.setEnd(SchemaHelper.formatDateUTC(feedCluster.getValidity().getEnd()));
- retentionApp.setStart(SchemaHelper.formatDateUTC(new Date()));
- retentionApp.setTimezone(feed.getTimezone().getID());
- TimeUnit timeUnit = feed.getFrequency().getTimeUnit();
- if (timeUnit == TimeUnit.hours || timeUnit == TimeUnit.minutes) {
- retentionApp.setFrequency("${coord:hours(6)}");
- } else {
- retentionApp.setFrequency("${coord:days(1)}");
- }
-
- Path wfPath = getCoordPath(bundlePath, coordName);
- retentionApp.setAction(getRetentionWorkflowAction(cluster, wfPath, coordName));
- return retentionApp;
- }
-
- private ACTION getRetentionWorkflowAction(Cluster cluster, Path wfPath, String wfName)
- throws FalconException {
- ACTION retentionAction = new ACTION();
- WORKFLOW retentionWorkflow = new WORKFLOW();
- createRetentionWorkflow(cluster, wfPath, wfName);
- retentionWorkflow.setAppPath(getStoragePath(wfPath.toString()));
-
- Map<String, String> props = createCoordDefaultConfiguration(cluster, wfName);
- props.put("timeZone", entity.getTimezone().getID());
- props.put("frequency", entity.getFrequency().getTimeUnit().name());
-
- final Storage storage = FeedHelper.createStorage(cluster, entity);
- props.put("falconFeedStorageType", storage.getType().name());
-
- String feedDataPath = storage.getUriTemplate();
- props.put("feedDataPath",
- feedDataPath.replaceAll(Storage.DOLLAR_EXPR_START_REGEX, Storage.QUESTION_EXPR_START_REGEX));
-
- org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(entity, cluster.getName());
- props.put("limit", feedCluster.getRetention().getLimit().toString());
-
- props.put(ARG.operation.getPropName(), EntityOps.DELETE.name());
- props.put(ARG.feedNames.getPropName(), entity.getName());
- props.put(ARG.feedInstancePaths.getPropName(), IGNORE);
-
- props.put("falconInputFeeds", entity.getName());
- props.put("falconInPaths", IGNORE);
-
- propagateUserWorkflowProperties(props, "eviction");
-
- retentionWorkflow.setConfiguration(getCoordConfig(props));
- retentionAction.setWorkflow(retentionWorkflow);
- return retentionAction;
- }
-
- private void createRetentionWorkflow(Cluster cluster, Path wfPath, String wfName) throws FalconException {
- try {
- WORKFLOWAPP retWfApp = getWorkflowTemplate(RETENTION_WF_TEMPLATE);
- retWfApp.setName(wfName);
- addLibExtensionsToWorkflow(cluster, retWfApp, EntityType.FEED, "retention");
- addOozieRetries(retWfApp);
-
- if (shouldSetupHiveConfiguration(cluster, entity)) {
- setupHiveCredentials(cluster, wfPath, retWfApp);
- }
-
- marshal(cluster, retWfApp, wfPath);
- } catch(IOException e) {
- throw new FalconException("Unable to create retention workflow", e);
- }
- }
-
- private void setupHiveCredentials(Cluster cluster, Path wfPath,
- WORKFLOWAPP workflowApp) throws FalconException {
- if (isSecurityEnabled) {
- // add hcatalog credentials for secure mode and add a reference to each action
- addHCatalogCredentials(workflowApp, cluster, HIVE_CREDENTIAL_NAME);
- }
-
- // create hive-site.xml file so actions can use it in the classpath
- createHiveConfiguration(cluster, wfPath, ""); // no prefix since only one hive instance
-
- for (Object object : workflowApp.getDecisionOrForkOrJoin()) {
- if (!(object instanceof org.apache.falcon.oozie.workflow.ACTION)) {
- continue;
- }
-
- org.apache.falcon.oozie.workflow.ACTION action =
- (org.apache.falcon.oozie.workflow.ACTION) object;
- String actionName = action.getName();
- if ("eviction".equals(actionName)) {
- // add reference to hive-site conf to each action
- action.getJava().setJobXml("${wf:appPath()}/conf/hive-site.xml");
-
- if (isSecurityEnabled) {
- // add a reference to credential in the action
- action.setCred(HIVE_CREDENTIAL_NAME);
- }
- }
- }
- }
- }
-
- private class ReplicationOozieWorkflowMapper {
- private static final String MR_MAX_MAPS = "maxMaps";
- private static final String MR_MAP_BANDWIDTH = "mapBandwidthKB";
-
- private static final int THIRTY_MINUTES = 30 * 60 * 1000;
-
- private static final String REPLICATION_COORD_TEMPLATE = "/config/coordinator/replication-coordinator.xml";
- private static final String REPLICATION_WF_TEMPLATE = "/config/workflow/replication-workflow.xml";
-
- private static final String TIMEOUT = "timeout";
- private static final String PARALLEL = "parallel";
-
- private static final String SOURCE_HIVE_CREDENTIAL_NAME = "falconSourceHiveAuth";
- private static final String TARGET_HIVE_CREDENTIAL_NAME = "falconTargetHiveAuth";
-
- /**
- * This method is called for each source serializing a workflow for each source per
- * target. Additionally, hive credentials are recorded in the workflow definition.
- *
- * @param targetCluster target cluster
- * @param sourceCluster source cluster
- * @param wfPath workflow path
- * @param wfName workflow name
- * @throws FalconException
- */
- private void createReplicationWorkflow(Cluster targetCluster, Cluster sourceCluster,
- Path wfPath, String wfName) throws FalconException {
- WORKFLOWAPP repWFapp = getWorkflowTemplate(REPLICATION_WF_TEMPLATE);
- repWFapp.setName(wfName);
-
- try {
- addLibExtensionsToWorkflow(targetCluster, repWFapp, EntityType.FEED, "replication");
- } catch (IOException e) {
- throw new FalconException("Unable to add lib extensions to workflow", e);
- }
-
- addOozieRetries(repWFapp);
-
- if (shouldSetupHiveConfiguration(targetCluster, entity)) {
- setupHiveCredentials(targetCluster, sourceCluster, repWFapp);
- }
-
- marshal(targetCluster, repWFapp, wfPath);
- }
-
- private void setupHiveCredentials(Cluster targetCluster, Cluster sourceCluster,
- WORKFLOWAPP workflowApp) {
- if (isSecurityEnabled) {
- // add hcatalog credentials for secure mode and add a reference to each action
- addHCatalogCredentials(workflowApp, sourceCluster, SOURCE_HIVE_CREDENTIAL_NAME);
- addHCatalogCredentials(workflowApp, targetCluster, TARGET_HIVE_CREDENTIAL_NAME);
- }
-
- // hive-site.xml file is created later in coordinator initialization but
- // actions are set to point to that here
-
- for (Object object : workflowApp.getDecisionOrForkOrJoin()) {
- if (!(object instanceof org.apache.falcon.oozie.workflow.ACTION)) {
- continue;
- }
-
- org.apache.falcon.oozie.workflow.ACTION action =
- (org.apache.falcon.oozie.workflow.ACTION) object;
- String actionName = action.getName();
- if ("recordsize".equals(actionName)) {
- // add reference to hive-site conf to each action
- action.getJava().setJobXml("${wf:appPath()}/conf/falcon-source-hive-site.xml");
-
- if (isSecurityEnabled) { // add a reference to credential in the action
- action.setCred(SOURCE_HIVE_CREDENTIAL_NAME);
- }
- } else if ("table-export".equals(actionName)) {
- if (isSecurityEnabled) { // add a reference to credential in the action
- action.setCred(SOURCE_HIVE_CREDENTIAL_NAME);
- }
- } else if ("table-import".equals(actionName)) {
- if (isSecurityEnabled) { // add a reference to credential in the action
- action.setCred(TARGET_HIVE_CREDENTIAL_NAME);
- }
- }
- }
- }
-
- private COORDINATORAPP createAndGetCoord(Feed feed, Cluster srcCluster, Cluster trgCluster,
- Path wfPath) throws FalconException {
- long replicationDelayInMillis = getReplicationDelayInMillis(feed, srcCluster);
- Date sourceStartDate = getStartDate(feed, srcCluster, replicationDelayInMillis);
- Date sourceEndDate = getEndDate(feed, srcCluster);
-
- Date targetStartDate = getStartDate(feed, trgCluster, replicationDelayInMillis);
- Date targetEndDate = getEndDate(feed, trgCluster);
-
- if (noOverlapExists(sourceStartDate, sourceEndDate,
- targetStartDate, targetEndDate)) {
- LOG.warn("Not creating replication coordinator, as the source cluster: {} and target cluster: {} do "
- + "not have overlapping dates", srcCluster.getName(), trgCluster.getName());
- return null;
- }
-
- COORDINATORAPP replicationCoord;
- try {
- replicationCoord = getCoordinatorTemplate(REPLICATION_COORD_TEMPLATE);
- } catch (FalconException e) {
- throw new FalconException("Cannot unmarshall replication coordinator template", e);
- }
-
- String coordName = EntityUtil.getWorkflowName(
- Tag.REPLICATION, Arrays.asList(srcCluster.getName()), feed).toString();
- String start = sourceStartDate.after(targetStartDate)
- ? SchemaHelper.formatDateUTC(sourceStartDate) : SchemaHelper.formatDateUTC(targetStartDate);
- String end = sourceEndDate.before(targetEndDate)
- ? SchemaHelper.formatDateUTC(sourceEndDate) : SchemaHelper.formatDateUTC(targetEndDate);
-
- initializeCoordAttributes(replicationCoord, coordName, feed, start, end, replicationDelayInMillis);
- setCoordControls(feed, replicationCoord);
-
- final Storage sourceStorage = FeedHelper.createReadOnlyStorage(srcCluster, feed);
- initializeInputDataSet(feed, srcCluster, replicationCoord, sourceStorage);
-
- final Storage targetStorage = FeedHelper.createStorage(trgCluster, feed);
- initializeOutputDataSet(feed, trgCluster, replicationCoord, targetStorage);
-
- ACTION replicationWorkflowAction = getReplicationWorkflowAction(
- srcCluster, trgCluster, wfPath, coordName, sourceStorage, targetStorage);
- replicationCoord.setAction(replicationWorkflowAction);
-
- return replicationCoord;
- }
-
- private Date getStartDate(Feed feed, Cluster cluster, long replicationDelayInMillis) {
- Date startDate = FeedHelper.getCluster(feed, cluster.getName()).getValidity().getStart();
- return replicationDelayInMillis == 0 ? startDate : new Date(startDate.getTime() + replicationDelayInMillis);
- }
-
- private Date getEndDate(Feed feed, Cluster cluster) {
- return FeedHelper.getCluster(feed, cluster.getName()).getValidity().getEnd();
- }
-
- private boolean noOverlapExists(Date sourceStartDate, Date sourceEndDate,
- Date targetStartDate, Date targetEndDate) {
- return sourceStartDate.after(targetEndDate) || targetStartDate.after(sourceEndDate);
- }
-
- private void initializeCoordAttributes(COORDINATORAPP replicationCoord, String coordName,
- Feed feed, String start, String end, long delayInMillis) {
- replicationCoord.setName(coordName);
- replicationCoord.setFrequency("${coord:" + feed.getFrequency().toString() + "}");
-
- if (delayInMillis > 0) {
- long delayInMins = -1 * delayInMillis / (1000 * 60);
- String elExp = "${now(0," + delayInMins + ")}";
-
- replicationCoord.getInputEvents().getDataIn().get(0).getInstance().set(0, elExp);
- replicationCoord.getOutputEvents().getDataOut().get(0).setInstance(elExp);
- }
-
- replicationCoord.setStart(start);
- replicationCoord.setEnd(end);
- replicationCoord.setTimezone(feed.getTimezone().getID());
- }
-
- private long getReplicationDelayInMillis(Feed feed, Cluster srcCluster) throws FalconException {
- Frequency replicationDelay = FeedHelper.getCluster(feed, srcCluster.getName()).getDelay();
- long delayInMillis=0;
- if (replicationDelay != null) {
- delayInMillis = ExpressionHelper.get().evaluate(
- replicationDelay.toString(), Long.class);
- }
-
- return delayInMillis;
- }
-
- private void setCoordControls(Feed feed, COORDINATORAPP replicationCoord) throws FalconException {
- long frequencyInMillis = ExpressionHelper.get().evaluate(
- feed.getFrequency().toString(), Long.class);
- long timeoutInMillis = frequencyInMillis * 6;
- if (timeoutInMillis < THIRTY_MINUTES) {
- timeoutInMillis = THIRTY_MINUTES;
- }
-
- Map<String, String> props = getEntityProperties();
- String timeout = props.get(TIMEOUT);
- if (timeout!=null) {
- try{
- timeoutInMillis= ExpressionHelper.get().evaluate(timeout, Long.class);
- } catch (Exception ignore) {
- LOG.error("Unable to evaluate timeout:", ignore);
- }
- }
- replicationCoord.getControls().setTimeout(String.valueOf(timeoutInMillis / (1000 * 60)));
- replicationCoord.getControls().setThrottle(String.valueOf(timeoutInMillis / frequencyInMillis * 2));
-
- String parallelProp = props.get(PARALLEL);
- int parallel = 1;
- if (parallelProp != null) {
- try {
- parallel = Integer.parseInt(parallelProp);
- } catch (NumberFormatException ignore) {
- LOG.error("Unable to parse parallel:", ignore);
- }
- }
- replicationCoord.getControls().setConcurrency(String.valueOf(parallel));
- }
-
- private void initializeInputDataSet(Feed feed, Cluster srcCluster, COORDINATORAPP replicationCoord,
- Storage sourceStorage) throws FalconException {
- SYNCDATASET inputDataset = (SYNCDATASET)
- replicationCoord.getDatasets().getDatasetOrAsyncDataset().get(0);
-
- String uriTemplate = sourceStorage.getUriTemplate(LocationType.DATA);
- if (sourceStorage.getType() == Storage.TYPE.TABLE) {
- uriTemplate = uriTemplate.replace("thrift", "hcat"); // Oozie requires this!!!
- }
- inputDataset.setUriTemplate(uriTemplate);
-
- setDatasetValues(inputDataset, feed, srcCluster);
-
- if (feed.getAvailabilityFlag() == null) {
- inputDataset.setDoneFlag("");
- } else {
- inputDataset.setDoneFlag(feed.getAvailabilityFlag());
- }
- }
-
- private void initializeOutputDataSet(Feed feed, Cluster targetCluster, COORDINATORAPP replicationCoord,
- Storage targetStorage) throws FalconException {
- SYNCDATASET outputDataset = (SYNCDATASET)
- replicationCoord.getDatasets().getDatasetOrAsyncDataset().get(1);
-
- String uriTemplate = targetStorage.getUriTemplate(LocationType.DATA);
- if (targetStorage.getType() == Storage.TYPE.TABLE) {
- uriTemplate = uriTemplate.replace("thrift", "hcat"); // Oozie requires this!!!
- }
- outputDataset.setUriTemplate(uriTemplate);
-
- setDatasetValues(outputDataset, feed, targetCluster);
- }
-
- private void setDatasetValues(SYNCDATASET dataset, Feed feed, Cluster cluster) {
- dataset.setInitialInstance(SchemaHelper.formatDateUTC(
- FeedHelper.getCluster(feed, cluster.getName()).getValidity().getStart()));
- dataset.setTimezone(feed.getTimezone().getID());
- dataset.setFrequency("${coord:" + feed.getFrequency().toString() + "}");
- }
-
- private ACTION getReplicationWorkflowAction(Cluster srcCluster, Cluster trgCluster, Path wfPath,
- String wfName, Storage sourceStorage,
- Storage targetStorage) throws FalconException {
- ACTION replicationAction = new ACTION();
- WORKFLOW replicationWF = new WORKFLOW();
-
- replicationWF.setAppPath(getStoragePath(wfPath.toString()));
- Map<String, String> props = createCoordDefaultConfiguration(trgCluster, wfName);
- props.put("srcClusterName", srcCluster.getName());
- props.put("srcClusterColo", srcCluster.getColo());
- if (props.get(MR_MAX_MAPS) == null) { // set default if user has not overridden
- props.put(MR_MAX_MAPS, getDefaultMaxMaps());
- }
- if (props.get(MR_MAP_BANDWIDTH) == null) { // set default if user has not overridden
- props.put(MR_MAP_BANDWIDTH, getDefaultMapBandwidth());
- }
-
- // the storage type is uniform across source and target feeds for replication
- props.put("falconFeedStorageType", sourceStorage.getType().name());
-
- String instancePaths = null;
- if (sourceStorage.getType() == Storage.TYPE.FILESYSTEM) {
- String pathsWithPartitions = getPathsWithPartitions(srcCluster, trgCluster, entity);
- instancePaths = pathsWithPartitions;
-
- propagateFileSystemCopyProperties(pathsWithPartitions, props);
- } else if (sourceStorage.getType() == Storage.TYPE.TABLE) {
- instancePaths = "${coord:dataIn('input')}";
-
- final CatalogStorage sourceTableStorage = (CatalogStorage) sourceStorage;
- propagateTableStorageProperties(srcCluster, sourceTableStorage, props, "falconSource");
- final CatalogStorage targetTableStorage = (CatalogStorage) targetStorage;
- propagateTableStorageProperties(trgCluster, targetTableStorage, props, "falconTarget");
- propagateTableCopyProperties(srcCluster, sourceTableStorage,
- trgCluster, targetTableStorage, props);
- setupHiveConfiguration(srcCluster, trgCluster, wfPath);
- }
-
- propagateLateDataProperties(entity, instancePaths, sourceStorage.getType().name(), props);
- propagateUserWorkflowProperties(props, "replication");
-
- replicationWF.setConfiguration(getCoordConfig(props));
- replicationAction.setWorkflow(replicationWF);
-
- return replicationAction;
- }
-
- private String getDefaultMaxMaps() {
- return RuntimeProperties.get().getProperty("falcon.replication.workflow.maxmaps", "5");
- }
-
- private String getDefaultMapBandwidth() {
- return RuntimeProperties.get().getProperty("falcon.replication.workflow.mapbandwidthKB", "102400");
- }
-
- private String getPathsWithPartitions(Cluster srcCluster, Cluster trgCluster,
- Feed feed) throws FalconException {
- String srcPart = FeedHelper.normalizePartitionExpression(
- FeedHelper.getCluster(feed, srcCluster.getName()).getPartition());
- srcPart = FeedHelper.evaluateClusterExp(srcCluster, srcPart);
-
- String targetPart = FeedHelper.normalizePartitionExpression(
- FeedHelper.getCluster(feed, trgCluster.getName()).getPartition());
- targetPart = FeedHelper.evaluateClusterExp(trgCluster, targetPart);
-
- StringBuilder pathsWithPartitions = new StringBuilder();
- pathsWithPartitions.append("${coord:dataIn('input')}/")
- .append(FeedHelper.normalizePartitionExpression(srcPart, targetPart));
-
- String parts = pathsWithPartitions.toString().replaceAll("//+", "/");
- parts = StringUtils.stripEnd(parts, "/");
- return parts;
- }
-
- private void propagateFileSystemCopyProperties(String pathsWithPartitions,
- Map<String, String> props) throws FalconException {
- props.put("sourceRelativePaths", pathsWithPartitions);
-
- props.put("distcpSourcePaths", "${coord:dataIn('input')}");
- props.put("distcpTargetPaths", "${coord:dataOut('output')}");
- }
-
- private void propagateTableStorageProperties(Cluster cluster, CatalogStorage tableStorage,
- Map<String, String> props, String prefix) {
- props.put(prefix + "NameNode", ClusterHelper.getStorageUrl(cluster));
- props.put(prefix + "JobTracker", ClusterHelper.getMREndPoint(cluster));
- props.put(prefix + "HcatNode", tableStorage.getCatalogUrl());
-
- props.put(prefix + "Database", tableStorage.getDatabase());
- props.put(prefix + "Table", tableStorage.getTable());
- props.put(prefix + "Partition", "(${coord:dataInPartitions('input', 'hive-export')})");
- }
-
- private void setupHiveConfiguration(Cluster srcCluster, Cluster trgCluster,
- Path wfPath) throws FalconException {
- Configuration conf = ClusterHelper.getConfiguration(trgCluster);
- FileSystem fs = HadoopClientFactory.get().createFileSystem(conf);
-
- try {
- // copy import export scripts to stagingDir
- Path scriptPath = new Path(wfPath, "scripts");
- copyHiveScript(fs, scriptPath, "/config/workflow/", "falcon-table-export.hql");
- copyHiveScript(fs, scriptPath, "/config/workflow/", "falcon-table-import.hql");
-
- // create hive conf to stagingDir
- Path confPath = new Path(wfPath + "/conf");
- persistHiveConfiguration(fs, confPath, srcCluster, "falcon-source-");
- persistHiveConfiguration(fs, confPath, trgCluster, "falcon-target-");
- } catch (IOException e) {
- throw new FalconException("Unable to create hive conf files", e);
- }
- }
-
- private void copyHiveScript(FileSystem fs, Path scriptPath,
- String localScriptPath, String scriptName) throws IOException {
- OutputStream out = null;
- InputStream in = null;
- try {
- out = fs.create(new Path(scriptPath, scriptName));
- in = OozieFeedWorkflowBuilder.class.getResourceAsStream(localScriptPath + scriptName);
- IOUtils.copy(in, out);
- } finally {
- IOUtils.closeQuietly(in);
- IOUtils.closeQuietly(out);
- }
- }
-
- private void propagateTableCopyProperties(Cluster srcCluster, CatalogStorage sourceStorage,
- Cluster trgCluster, CatalogStorage targetStorage,
- Map<String, String> props) {
- // create staging dirs for export at source & set it as distcpSourcePaths
- String sourceStagingPath =
- FeedHelper.getStagingPath(srcCluster, entity, sourceStorage, Tag.REPLICATION,
- NOMINAL_TIME_EL + "/" + trgCluster.getName());
- props.put("distcpSourcePaths", sourceStagingPath);
-
- // create staging dirs for import at target & set it as distcpTargetPaths
- String targetStagingPath =
- FeedHelper.getStagingPath(trgCluster, entity, targetStorage, Tag.REPLICATION,
- NOMINAL_TIME_EL + "/" + trgCluster.getName());
- props.put("distcpTargetPaths", targetStagingPath);
-
- props.put("sourceRelativePaths", IGNORE); // this will bot be used for Table storage.
- }
-
- private void propagateLateDataProperties(Feed feed, String instancePaths,
- String falconFeedStorageType, Map<String, String> props) {
- // todo these pairs are the same but used in different context
- // late data handler - should-record action
- props.put("falconInputFeeds", feed.getName());
- props.put("falconInPaths", instancePaths);
-
- // storage type for each corresponding feed - in this case only one feed is involved
- // needed to compute usage based on storage type in LateDataHandler
- props.put("falconInputFeedStorageTypes", falconFeedStorageType);
-
- // falcon post processing
- props.put(ARG.feedNames.getPropName(), feed.getName());
- props.put(ARG.feedInstancePaths.getPropName(), "${coord:dataOut('output')}");
- }
- }
-
- private void addOozieRetries(WORKFLOWAPP workflow) {
- for (Object object : workflow.getDecisionOrForkOrJoin()) {
- if (!(object instanceof org.apache.falcon.oozie.workflow.ACTION)) {
- continue;
- }
- org.apache.falcon.oozie.workflow.ACTION action = (org.apache.falcon.oozie.workflow.ACTION) object;
- String actionName = action.getName();
- if (FALCON_ACTIONS.contains(actionName)) {
- decorateWithOozieRetries(action);
- }
- }
- }
-
- private void propagateUserWorkflowProperties(Map<String, String> props, String policy) {
- props.put("userWorkflowName", policy + "-policy");
- props.put("userWorkflowEngine", "falcon");
-
- String version;
- try {
- version = BuildProperties.get().getProperty("build.version");
- } catch (Exception e) { // unfortunate that this is only available in prism/webapp
- version = "0.5";
- }
- props.put("userWorkflowVersion", version);
- }
-
- protected boolean shouldSetupHiveConfiguration(Cluster cluster,
- Feed feed) throws FalconException {
- Storage.TYPE storageType = FeedHelper.getStorageType(feed, cluster);
- return Storage.TYPE.TABLE == storageType;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/src/main/resources/config/coordinator/replication-coordinator.xml
----------------------------------------------------------------------
diff --git a/feed/src/main/resources/config/coordinator/replication-coordinator.xml b/feed/src/main/resources/config/coordinator/replication-coordinator.xml
deleted file mode 100644
index 693b0bd..0000000
--- a/feed/src/main/resources/config/coordinator/replication-coordinator.xml
+++ /dev/null
@@ -1,51 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<coordinator-app name="#NAME#" frequency="#FREQUENCY#"
- start="#START_TIME#" end="#END_TIME" timezone="#TIMEZONE#"
- xmlns="uri:oozie:coordinator:0.3">
- <controls>
- <concurrency>1</concurrency>
- <execution>FIFO</execution>
- </controls>
- <datasets>
- <dataset name="input-dataset" frequency="#FEED_FREQ#"
- initial-instance="#START_TIME#" timezone="#TIMEZONE#">
- <uri-template>#FEED_PATH#</uri-template>
- </dataset>
- <dataset name="output-dataset" frequency="#FEED_FREQ#"
- initial-instance="#START_TIME#" timezone="#TIMEZONE#">
- <uri-template>#FEED_PATH#</uri-template>
- </dataset>
- </datasets>
- <input-events>
- <data-in name="input" dataset="input-dataset">
- <instance>${coord:current(0)}</instance>
- </data-in>
- </input-events>
- <output-events>
- <data-out name="output" dataset="output-dataset">
- <instance>${coord:current(0)}</instance>
- </data-out>
- </output-events>
- <action>
- <workflow>
- <app-path>#WF_PATH#</app-path>
- <configuration/>
- </workflow>
- </action>
-</coordinator-app>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/src/main/resources/config/workflow/falcon-table-export.hql
----------------------------------------------------------------------
diff --git a/feed/src/main/resources/config/workflow/falcon-table-export.hql b/feed/src/main/resources/config/workflow/falcon-table-export.hql
deleted file mode 100644
index 37fd1b7..0000000
--- a/feed/src/main/resources/config/workflow/falcon-table-export.hql
+++ /dev/null
@@ -1,18 +0,0 @@
---
--- Licensed to the Apache Software Foundation (ASF) under one
--- or more contributor license agreements. See the NOTICE file
--- distributed with this work for additional information
--- regarding copyright ownership. The ASF licenses this file
--- to you under the Apache License, Version 2.0 (the
--- "License"); you may not use this file except in compliance
--- with the License. You may obtain a copy of the License at
---
--- http://www.apache.org/licenses/LICENSE-2.0
---
--- Unless required by applicable law or agreed to in writing, software
--- distributed under the License is distributed on an "AS IS" BASIS,
--- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--- See the License for the specific language governing permissions and
--- limitations under the License.
---
-export table ${falconSourceDatabase}.${falconSourceTable} partition ${falconSourcePartition} to '${falconSourceStagingDir}';
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/src/main/resources/config/workflow/falcon-table-import.hql
----------------------------------------------------------------------
diff --git a/feed/src/main/resources/config/workflow/falcon-table-import.hql b/feed/src/main/resources/config/workflow/falcon-table-import.hql
deleted file mode 100644
index 653d580..0000000
--- a/feed/src/main/resources/config/workflow/falcon-table-import.hql
+++ /dev/null
@@ -1,20 +0,0 @@
---
--- Licensed to the Apache Software Foundation (ASF) under one
--- or more contributor license agreements. See the NOTICE file
--- distributed with this work for additional information
--- regarding copyright ownership. The ASF licenses this file
--- to you under the Apache License, Version 2.0 (the
--- "License"); you may not use this file except in compliance
--- with the License. You may obtain a copy of the License at
---
--- http://www.apache.org/licenses/LICENSE-2.0
---
--- Unless required by applicable law or agreed to in writing, software
--- distributed under the License is distributed on an "AS IS" BASIS,
--- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--- See the License for the specific language governing permissions and
--- limitations under the License.
---
-use ${falconTargetDatabase};
-alter table ${falconTargetTable} drop if exists partition ${falconTargetPartition};
-import table ${falconTargetTable} partition ${falconTargetPartition} from '${falconTargetStagingDir}';
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/feed/src/main/resources/config/workflow/replication-workflow.xml
----------------------------------------------------------------------
diff --git a/feed/src/main/resources/config/workflow/replication-workflow.xml b/feed/src/main/resources/config/workflow/replication-workflow.xml
deleted file mode 100644
index 0748acf..0000000
--- a/feed/src/main/resources/config/workflow/replication-workflow.xml
+++ /dev/null
@@ -1,330 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<workflow-app xmlns='uri:oozie:workflow:0.3' name='falcon-feed-parent-workflow'>
- <start to='should-record'/>
- <decision name='should-record'>
- <switch>
- <case to="recordsize">
- ${shouldRecord=="true"}
- </case>
- <default to="replication-decision"/>
- </switch>
- </decision>
- <action name='recordsize'>
- <java>
- <job-tracker>${jobTracker}</job-tracker>
- <name-node>${nameNode}</name-node>
- <configuration>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- <!-- HCatalog jars -->
- <property>
- <name>oozie.action.sharelib.for.java</name>
- <value>hcatalog</value>
- </property>
- </configuration>
- <main-class>org.apache.falcon.latedata.LateDataHandler</main-class>
- <arg>-out</arg>
- <arg>${logDir}/latedata/${nominalTime}/${srcClusterName}</arg>
- <arg>-paths</arg>
- <arg>${falconInPaths}</arg>
- <arg>-falconInputFeeds</arg>
- <arg>${falconInputFeeds}</arg>
- <arg>-falconInputFeedStorageTypes</arg>
- <arg>${falconInputFeedStorageTypes}</arg>
- <capture-output/>
- </java>
- <ok to="replication-decision"/>
- <error to="failed-post-processing"/>
- </action>
- <decision name="replication-decision">
- <switch>
- <case to="table-export">
- ${falconFeedStorageType == "TABLE"}
- </case>
- <default to="replication"/>
- </switch>
- </decision>
- <!-- Table Replication - Export data and metadata to HDFS Staging from Source Hive -->
- <action name="table-export">
- <hive xmlns="uri:oozie:hive-action:0.2">
- <job-tracker>${falconSourceJobTracker}</job-tracker>
- <name-node>${falconSourceNameNode}</name-node>
- <prepare>
- <delete path="${distcpSourcePaths}"/>
- </prepare>
- <job-xml>${wf:appPath()}/conf/falcon-source-hive-site.xml</job-xml>
- <configuration>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- </configuration>
- <script>${wf:appPath()}/scripts/falcon-table-export.hql</script>
- <param>falconSourceDatabase=${falconSourceDatabase}</param>
- <param>falconSourceTable=${falconSourceTable}</param>
- <param>falconSourcePartition=${falconSourcePartition}</param>
- <param>falconSourceStagingDir=${distcpSourcePaths}</param>
- </hive>
- <ok to="replication"/>
- <error to="failed-post-processing"/>
- </action>
- <!-- Replication action -->
- <action name="replication">
- <java>
- <job-tracker>${jobTracker}</job-tracker>
- <name-node>${nameNode}</name-node>
- <configuration>
- <property> <!-- hadoop 2 parameter -->
- <name>oozie.launcher.mapreduce.job.user.classpath.first</name>
- <value>true</value>
- </property>
- <property> <!-- hadoop 1 parameter -->
- <name>oozie.launcher.mapreduce.user.classpath.first</name>
- <value>true</value>
- </property>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- </configuration>
- <main-class>org.apache.falcon.replication.FeedReplicator</main-class>
- <arg>-Dfalcon.include.path=${sourceRelativePaths}</arg>
- <arg>-Dmapred.job.queue.name=${queueName}</arg>
- <arg>-Dmapred.job.priority=${jobPriority}</arg>
- <arg>-maxMaps</arg>
- <arg>${maxMaps}</arg>
- <arg>-mapBandwidthKB</arg>
- <arg>${mapBandwidthKB}</arg>
- <arg>-sourcePaths</arg>
- <arg>${distcpSourcePaths}</arg>
- <arg>-targetPath</arg>
- <arg>${distcpTargetPaths}</arg>
- <arg>-falconFeedStorageType</arg>
- <arg>${falconFeedStorageType}</arg>
- <file>${wf:conf("falcon.libpath")}/hadoop-distcp.jar</file>
- </java>
- <ok to="post-replication-decision"/>
- <error to="failed-post-processing"/>
- </action>
- <decision name="post-replication-decision">
- <switch>
- <case to="table-import">
- ${falconFeedStorageType == "TABLE"}
- </case>
- <default to="succeeded-post-processing"/>
- </switch>
- </decision>
- <!-- Table Replication - Import data and metadata from HDFS Staging into Target Hive -->
- <action name="table-import">
- <hive xmlns="uri:oozie:hive-action:0.2">
- <job-tracker>${falconTargetJobTracker}</job-tracker>
- <name-node>${falconTargetNameNode}</name-node>
- <job-xml>${wf:appPath()}/conf/falcon-target-hive-site.xml</job-xml>
- <configuration>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- </configuration>
- <script>${wf:appPath()}/scripts/falcon-table-import.hql</script>
- <param>falconTargetDatabase=${falconTargetDatabase}</param>
- <param>falconTargetTable=${falconTargetTable}</param>
- <param>falconTargetPartition=${falconTargetPartition}</param>
- <param>falconTargetStagingDir=${distcpTargetPaths}</param>
- </hive>
- <ok to="cleanup-table-staging-dir"/>
- <error to="failed-post-processing"/>
- </action>
- <action name="cleanup-table-staging-dir">
- <fs>
- <delete path="${distcpSourcePaths}"/>
- <delete path="${distcpTargetPaths}"/>
- </fs>
- <ok to="succeeded-post-processing"/>
- <error to="failed-post-processing"/>
- </action>
- <action name='succeeded-post-processing'>
- <java>
- <job-tracker>${jobTracker}</job-tracker>
- <name-node>${nameNode}</name-node>
- <configuration>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- </configuration>
- <main-class>org.apache.falcon.workflow.FalconPostProcessing</main-class>
- <arg>-cluster</arg>
- <arg>${cluster}</arg>
- <arg>-entityType</arg>
- <arg>${entityType}</arg>
- <arg>-entityName</arg>
- <arg>${entityName}</arg>
- <arg>-nominalTime</arg>
- <arg>${nominalTime}</arg>
- <arg>-operation</arg>
- <arg>REPLICATE</arg>
- <arg>-workflowId</arg>
- <arg>${wf:id()}</arg>
- <arg>-runId</arg>
- <arg>${wf:run()}</arg>
- <arg>-status</arg>
- <arg>SUCCEEDED</arg>
- <arg>-timeStamp</arg>
- <arg>${timeStamp}</arg>
- <arg>-brokerImplClass</arg>
- <arg>${wf:conf("broker.impl.class")}</arg>
- <arg>-brokerUrl</arg>
- <arg>${wf:conf("broker.url")}</arg>
- <arg>-userBrokerImplClass</arg>
- <arg>${userBrokerImplClass}</arg>
- <arg>-userBrokerUrl</arg>
- <arg>${userBrokerUrl}</arg>
- <arg>-brokerTTL</arg>
- <arg>${wf:conf("broker.ttlInMins")}</arg>
- <arg>-feedNames</arg>
- <arg>${feedNames}</arg>
- <arg>-feedInstancePaths</arg>
- <arg>${feedInstancePaths}</arg>
- <arg>-logFile</arg>
- <arg>${logDir}/instancePaths-${nominalTime}-${srcClusterName}.csv</arg>
- <arg>-workflowEngineUrl</arg>
- <arg>${workflowEngineUrl}</arg>
- <arg>-userWorkflowName</arg>
- <arg>${userWorkflowName}</arg>
- <arg>-userWorkflowVersion</arg>
- <arg>${userWorkflowVersion}</arg>
- <arg>-userWorkflowEngine</arg>
- <arg>${userWorkflowEngine}</arg>
- <arg>-subflowId</arg>
- <arg>${wf:id()}</arg>
- <arg>-logDir</arg>
- <arg>${logDir}/job-${nominalTime}/${srcClusterName}/</arg>
- <arg>-workflowUser</arg>
- <arg>${wf:user()}</arg>
- <arg>-falconInputFeeds</arg>
- <arg>${falconInputFeeds}</arg>
- <arg>-falconInputPaths</arg>
- <arg>${falconInPaths}</arg>
- <file>${wf:conf("falcon.libpath")}/activemq-core.jar</file>
- <file>${wf:conf("falcon.libpath")}/geronimo-j2ee-management.jar</file>
- <file>${wf:conf("falcon.libpath")}/jms.jar</file>
- <file>${wf:conf("falcon.libpath")}/json-simple.jar</file>
- <file>${wf:conf("falcon.libpath")}/oozie-client.jar</file>
- <file>${wf:conf("falcon.libpath")}/spring-jms.jar</file>
- </java>
- <ok to="end"/>
- <error to="fail"/>
- </action>
- <action name='failed-post-processing'>
- <java>
- <job-tracker>${jobTracker}</job-tracker>
- <name-node>${nameNode}</name-node>
- <configuration>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- </configuration>
- <main-class>org.apache.falcon.workflow.FalconPostProcessing</main-class>
- <arg>-cluster</arg>
- <arg>${cluster}</arg>
- <arg>-entityType</arg>
- <arg>${entityType}</arg>
- <arg>-entityName</arg>
- <arg>${entityName}</arg>
- <arg>-nominalTime</arg>
- <arg>${nominalTime}</arg>
- <arg>-operation</arg>
- <arg>REPLICATE</arg>
- <arg>-workflowId</arg>
- <arg>${wf:id()}</arg>
- <arg>-runId</arg>
- <arg>${wf:run()}</arg>
- <arg>-status</arg>
- <arg>FAILED</arg>
- <arg>-timeStamp</arg>
- <arg>${timeStamp}</arg>
- <arg>-brokerImplClass</arg>
- <arg>${wf:conf("broker.impl.class")}</arg>
- <arg>-brokerUrl</arg>
- <arg>${wf:conf("broker.url")}</arg>
- <arg>-userBrokerImplClass</arg>
- <arg>${userBrokerImplClass}</arg>
- <arg>-userBrokerUrl</arg>
- <arg>${userBrokerUrl}</arg>
- <arg>-brokerTTL</arg>
- <arg>${wf:conf("broker.ttlInMins")}</arg>
- <arg>-feedNames</arg>
- <arg>${feedNames}</arg>
- <arg>-feedInstancePaths</arg>
- <arg>${feedInstancePaths}</arg>
- <arg>-logFile</arg>
- <arg>${logDir}/instancePaths-${nominalTime}-${srcClusterName}.csv</arg>
- <arg>-workflowEngineUrl</arg>
- <arg>${workflowEngineUrl}</arg>
- <arg>-subflowId</arg>
- <arg>${wf:id()}</arg>
- <arg>-logDir</arg>
- <arg>${logDir}/job-${nominalTime}/${srcClusterName}/</arg>
- <arg>-workflowUser</arg>
- <arg>${wf:user()}</arg>
- <file>${wf:conf("falcon.libpath")}/activemq-core.jar</file>
- <file>${wf:conf("falcon.libpath")}/geronimo-j2ee-management.jar</file>
- <file>${wf:conf("falcon.libpath")}/jms.jar</file>
- <file>${wf:conf("falcon.libpath")}/json-simple.jar</file>
- <file>${wf:conf("falcon.libpath")}/oozie-client.jar</file>
- <file>${wf:conf("falcon.libpath")}/spring-jms.jar</file>
- </java>
- <ok to="fail"/>
- <error to="fail"/>
- </action>
- <kill name="fail">
- <message>
- Workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
- </message>
- </kill>
- <end name='end'/>
-</workflow-app>
[4/9] FALCON-369 Refactor workflow builder. Contributed by Shwetha GS
Posted by sh...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/java/org/apache/falcon/oozie/process/OozieProcessWorkflowBuilderTest.java
----------------------------------------------------------------------
diff --git a/oozie/src/test/java/org/apache/falcon/oozie/process/OozieProcessWorkflowBuilderTest.java b/oozie/src/test/java/org/apache/falcon/oozie/process/OozieProcessWorkflowBuilderTest.java
new file mode 100644
index 0000000..5ceea75
--- /dev/null
+++ b/oozie/src/test/java/org/apache/falcon/oozie/process/OozieProcessWorkflowBuilderTest.java
@@ -0,0 +1,767 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.oozie.process;
+
+import org.apache.falcon.FalconException;
+import org.apache.falcon.Tag;
+import org.apache.falcon.cluster.util.EmbeddedCluster;
+import org.apache.falcon.entity.CatalogStorage;
+import org.apache.falcon.entity.ClusterHelper;
+import org.apache.falcon.entity.EntityUtil;
+import org.apache.falcon.entity.FeedHelper;
+import org.apache.falcon.entity.ProcessHelper;
+import org.apache.falcon.entity.Storage;
+import org.apache.falcon.entity.store.ConfigurationStore;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.Frequency;
+import org.apache.falcon.entity.v0.SchemaHelper;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.cluster.Interfacetype;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.entity.v0.feed.LocationType;
+import org.apache.falcon.entity.v0.process.Input;
+import org.apache.falcon.entity.v0.process.Output;
+import org.apache.falcon.entity.v0.process.Process;
+import org.apache.falcon.entity.v0.process.Validity;
+import org.apache.falcon.entity.v0.process.Workflow;
+import org.apache.falcon.messaging.EntityInstanceMessage;
+import org.apache.falcon.oozie.OozieEntityBuilder;
+import org.apache.falcon.oozie.OozieOrchestrationWorkflowBuilder;
+import org.apache.falcon.oozie.bundle.BUNDLEAPP;
+import org.apache.falcon.oozie.coordinator.CONFIGURATION.Property;
+import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
+import org.apache.falcon.oozie.coordinator.SYNCDATASET;
+import org.apache.falcon.oozie.workflow.ACTION;
+import org.apache.falcon.oozie.workflow.DECISION;
+import org.apache.falcon.oozie.workflow.PIG;
+import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
+import org.apache.falcon.security.CurrentUser;
+import org.apache.falcon.security.SecurityUtil;
+import org.apache.falcon.util.OozieUtils;
+import org.apache.falcon.util.StartupProperties;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBElement;
+import javax.xml.bind.Unmarshaller;
+import javax.xml.transform.stream.StreamSource;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+/**
+ * Test for the Falcon entities mapping into Oozie artifacts.
+ */
+public class OozieProcessWorkflowBuilderTest extends AbstractTestBase {
+ private static final String PROCESS_XML = "/config/process/process-0.1.xml";
+ private static final String FEED_XML = "/config/feed/feed-0.1.xml";
+ private static final String CLUSTER_XML = "/config/cluster/cluster-0.1.xml";
+ private static final String PIG_PROCESS_XML = "/config/process/pig-process-0.1.xml";
+
+ private String hdfsUrl;
+ private FileSystem fs;
+ private Cluster cluster;
+
+ @BeforeClass
+ public void setUpDFS() throws Exception {
+ CurrentUser.authenticate("falcon");
+
+ Configuration conf = EmbeddedCluster.newCluster("testCluster").getConf();
+ hdfsUrl = conf.get("fs.default.name");
+ }
+
+ private void storeEntity(EntityType type, String name, String resource) throws Exception {
+ storeEntity(type, name, resource, null);
+ }
+
+ @BeforeMethod
+ public void setUp() throws Exception {
+ storeEntity(EntityType.CLUSTER, "corp", CLUSTER_XML);
+ storeEntity(EntityType.FEED, "clicks", FEED_XML);
+ storeEntity(EntityType.FEED, "impressions", FEED_XML);
+ storeEntity(EntityType.FEED, "clicksummary", FEED_XML);
+ storeEntity(EntityType.PROCESS, "clicksummary", PROCESS_XML);
+ storeEntity(EntityType.PROCESS, "pig-process", PIG_PROCESS_XML);
+
+ ConfigurationStore store = ConfigurationStore.get();
+ cluster = store.get(EntityType.CLUSTER, "corp");
+ org.apache.falcon.entity.v0.cluster.Property property =
+ new org.apache.falcon.entity.v0.cluster.Property();
+ property.setName(OozieOrchestrationWorkflowBuilder.METASTORE_KERBEROS_PRINCIPAL);
+ property.setValue("hive/_HOST");
+ cluster.getProperties().getProperties().add(property);
+
+ ClusterHelper.getInterface(cluster, Interfacetype.WRITE).setEndpoint(hdfsUrl);
+ ClusterHelper.getInterface(cluster, Interfacetype.REGISTRY).setEndpoint("thrift://localhost:49083");
+ fs = new Path(hdfsUrl).getFileSystem(EmbeddedCluster.newConfiguration());
+ fs.create(new Path(ClusterHelper.getLocation(cluster, "working"), "libext/PROCESS/ext.jar")).close();
+
+ Process process = store.get(EntityType.PROCESS, "clicksummary");
+ Path wfpath = new Path(process.getWorkflow().getPath());
+ assert new Path(hdfsUrl).getFileSystem(EmbeddedCluster.newConfiguration()).mkdirs(wfpath);
+ }
+
+ public void testDefCoordMap(Process process, COORDINATORAPP coord) throws Exception {
+ assertEquals("FALCON_PROCESS_DEFAULT_" + process.getName(), coord.getName());
+ Validity processValidity = process.getClusters().getClusters().get(0).getValidity();
+ assertEquals(SchemaHelper.formatDateUTC(processValidity.getStart()), coord.getStart());
+ assertEquals(SchemaHelper.formatDateUTC(processValidity.getEnd()), coord.getEnd());
+ assertEquals("${coord:" + process.getFrequency().toString() + "}", coord.getFrequency());
+ assertEquals(process.getTimezone().getID(), coord.getTimezone());
+
+ assertEquals(process.getParallel() + "", coord.getControls().getConcurrency());
+ assertEquals(process.getOrder().name(), coord.getControls().getExecution());
+
+ assertEquals(process.getInputs().getInputs().get(0).getName(),
+ coord.getInputEvents().getDataIn().get(0).getName());
+ assertEquals(process.getInputs().getInputs().get(0).getName(),
+ coord.getInputEvents().getDataIn().get(0).getDataset());
+ assertEquals("${" + process.getInputs().getInputs().get(0).getStart() + "}",
+ coord.getInputEvents().getDataIn().get(0).getStartInstance());
+ assertEquals("${" + process.getInputs().getInputs().get(0).getEnd() + "}",
+ coord.getInputEvents().getDataIn().get(0).getEndInstance());
+
+ assertEquals(process.getInputs().getInputs().get(1).getName(),
+ coord.getInputEvents().getDataIn().get(1).getName());
+ assertEquals(process.getInputs().getInputs().get(1).getName(),
+ coord.getInputEvents().getDataIn().get(1).getDataset());
+ assertEquals("${" + process.getInputs().getInputs().get(1).getStart() + "}",
+ coord.getInputEvents().getDataIn().get(1).getStartInstance());
+ assertEquals("${" + process.getInputs().getInputs().get(1).getEnd() + "}",
+ coord.getInputEvents().getDataIn().get(1).getEndInstance());
+
+ assertEquals(process.getOutputs().getOutputs().get(0).getName() + "stats",
+ coord.getOutputEvents().getDataOut().get(1).getName());
+ assertEquals(process.getOutputs().getOutputs().get(0).getName() + "meta",
+ coord.getOutputEvents().getDataOut().get(2).getName());
+ assertEquals(process.getOutputs().getOutputs().get(0).getName() + "tmp",
+ coord.getOutputEvents().getDataOut().get(3).getName());
+
+ assertEquals(process.getOutputs().getOutputs().get(0).getName(),
+ coord.getOutputEvents().getDataOut().get(0).getName());
+ assertEquals("${" + process.getOutputs().getOutputs().get(0).getInstance() + "}",
+ coord.getOutputEvents().getDataOut().get(0).getInstance());
+ assertEquals(process.getOutputs().getOutputs().get(0).getName(),
+ coord.getOutputEvents().getDataOut().get(0).getDataset());
+
+ assertEquals(6, coord.getDatasets().getDatasetOrAsyncDataset().size());
+
+ ConfigurationStore store = ConfigurationStore.get();
+ Feed feed = store.get(EntityType.FEED, process.getInputs().getInputs().get(0).getFeed());
+ SYNCDATASET ds = (SYNCDATASET) coord.getDatasets().getDatasetOrAsyncDataset().get(0);
+
+ final org.apache.falcon.entity.v0.feed.Cluster feedCluster = feed.getClusters().getClusters().get(0);
+ assertEquals(SchemaHelper.formatDateUTC(feedCluster.getValidity().getStart()), ds.getInitialInstance());
+ assertEquals(feed.getTimezone().getID(), ds.getTimezone());
+ assertEquals("${coord:" + feed.getFrequency().toString() + "}", ds.getFrequency());
+ assertEquals("", ds.getDoneFlag());
+ assertEquals(ds.getUriTemplate(),
+ FeedHelper.createStorage(feedCluster, feed).getUriTemplate(LocationType.DATA));
+
+ HashMap<String, String> props = new HashMap<String, String>();
+ for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
+ props.put(prop.getName(), prop.getValue());
+ }
+ assertEquals(props.get("mapred.job.priority"), "LOW");
+ Assert.assertEquals(props.get("logDir"), getLogPath(process));
+
+ assertLibExtensions(coord);
+ }
+
+ private String getLogPath(Process process) {
+ Path logPath = EntityUtil.getLogPath(cluster, process);
+ return (logPath.toUri().getScheme() == null ? "${nameNode}" : "") + logPath;
+ }
+
+ @Test
+ public void testBundle() throws Exception {
+ String path = StartupProperties.get().getProperty("system.lib.location");
+ if (!new File(path).exists()) {
+ Assert.assertTrue(new File(path).mkdirs());
+ }
+ Process process = ConfigurationStore.get().get(EntityType.PROCESS, "clicksummary");
+
+ WORKFLOWAPP parentWorkflow = initializeProcessMapper(process, "12", "360");
+ testParentWorkflow(process, parentWorkflow);
+ }
+
+ @Test
+ public void testBundle1() throws Exception {
+ Process process = ConfigurationStore.get().get(EntityType.PROCESS, "clicksummary");
+ process.setFrequency(Frequency.fromString("minutes(1)"));
+ process.setTimeout(Frequency.fromString("minutes(15)"));
+
+ WORKFLOWAPP parentWorkflow = initializeProcessMapper(process, "30", "15");
+ testParentWorkflow(process, parentWorkflow);
+ }
+
+ @Test
+ public void testPigProcessMapper() throws Exception {
+ Process process = ConfigurationStore.get().get(EntityType.PROCESS, "pig-process");
+ Assert.assertEquals("pig", process.getWorkflow().getEngine().value());
+
+ prepare(process);
+ WORKFLOWAPP parentWorkflow = initializeProcessMapper(process, "12", "360");
+ testParentWorkflow(process, parentWorkflow);
+
+ List<Object> decisionOrForkOrJoin = parentWorkflow.getDecisionOrForkOrJoin();
+
+ ACTION pigActionNode = (ACTION) decisionOrForkOrJoin.get(3);
+ Assert.assertEquals("user-pig-job", pigActionNode.getName());
+
+ final PIG pigAction = pigActionNode.getPig();
+ Assert.assertEquals(pigAction.getScript(), "${nameNode}/falcon/staging/workflows/pig-process/user/id.pig");
+ Assert.assertNotNull(pigAction.getPrepare());
+ Assert.assertEquals(1, pigAction.getPrepare().getDelete().size());
+ Assert.assertFalse(pigAction.getParam().isEmpty());
+ Assert.assertEquals(5, pigAction.getParam().size());
+ Assert.assertEquals(Collections.EMPTY_LIST, pigAction.getArchive());
+ Assert.assertTrue(pigAction.getFile().size() > 0);
+
+ ACTION oozieAction = (ACTION) decisionOrForkOrJoin.get(5);
+ Assert.assertEquals("user-oozie-workflow", oozieAction.getName());
+ Assert.assertEquals("#USER_WF_PATH#", oozieAction.getSubWorkflow().getAppPath());
+ }
+
+ @DataProvider(name = "secureOptions")
+ private Object[][] createOptions() {
+ return new Object[][] {
+ {"simple"},
+ {"kerberos"},
+ };
+ }
+
+ @Test (dataProvider = "secureOptions")
+ public void testHiveProcessMapper(String secureOption) throws Exception {
+ StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);
+
+ URL resource = this.getClass().getResource("/config/feed/hive-table-feed.xml");
+ Feed inFeed = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(resource);
+ ConfigurationStore.get().publish(EntityType.FEED, inFeed);
+
+ resource = this.getClass().getResource("/config/feed/hive-table-feed-out.xml");
+ Feed outFeed = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(resource);
+ ConfigurationStore.get().publish(EntityType.FEED, outFeed);
+
+ resource = this.getClass().getResource("/config/process/hive-process.xml");
+ Process process = (Process) EntityType.PROCESS.getUnmarshaller().unmarshal(resource);
+ ConfigurationStore.get().publish(EntityType.PROCESS, process);
+
+ prepare(process);
+ OozieEntityBuilder builder = OozieEntityBuilder.get(process);
+ Path bundlePath = new Path("/falcon/staging/workflows", process.getName());
+ builder.build(cluster, bundlePath);
+ assertTrue(fs.exists(bundlePath));
+
+ BUNDLEAPP bundle = getBundle(fs, bundlePath);
+ assertEquals(EntityUtil.getWorkflowName(process).toString(), bundle.getName());
+ assertEquals(1, bundle.getCoordinator().size());
+ assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString(),
+ bundle.getCoordinator().get(0).getName());
+ String coordPath = bundle.getCoordinator().get(0).getAppPath().replace("${nameNode}", "");
+
+ COORDINATORAPP coord = getCoordinator(fs, new Path(coordPath));
+ HashMap<String, String> props = new HashMap<String, String>();
+ for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
+ props.put(prop.getName(), prop.getValue());
+ }
+
+ // verify table and hive props
+ Map<String, String> expected = getExpectedProperties(inFeed, outFeed, process);
+ expected.putAll(ClusterHelper.getHiveProperties(cluster));
+ for (Map.Entry<String, String> entry : props.entrySet()) {
+ if (expected.containsKey(entry.getKey())) {
+ Assert.assertEquals(entry.getValue(), expected.get(entry.getKey()));
+ }
+ }
+ Assert.assertEquals(props.get("logDir"), getLogPath(process));
+
+ String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
+ WORKFLOWAPP parentWorkflow = getParentWorkflow(new Path(wfPath));
+ testParentWorkflow(process, parentWorkflow);
+
+ List<Object> decisionOrForkOrJoin = parentWorkflow.getDecisionOrForkOrJoin();
+
+ ACTION hiveNode = (ACTION) decisionOrForkOrJoin.get(4);
+ Assert.assertEquals("user-hive-job", hiveNode.getName());
+
+ JAXBElement<org.apache.falcon.oozie.hive.ACTION> actionJaxbElement = OozieUtils.unMarshalHiveAction(hiveNode);
+ org.apache.falcon.oozie.hive.ACTION hiveAction = actionJaxbElement.getValue();
+
+ Assert.assertEquals(hiveAction.getScript(),
+ "${nameNode}/falcon/staging/workflows/hive-process/user/script.hql");
+ Assert.assertEquals(hiveAction.getJobXml(), "${wf:appPath()}/conf/hive-site.xml");
+ Assert.assertNull(hiveAction.getPrepare());
+ Assert.assertEquals(Collections.EMPTY_LIST, hiveAction.getArchive());
+ Assert.assertFalse(hiveAction.getParam().isEmpty());
+ Assert.assertEquals(14, hiveAction.getParam().size());
+
+ Assert.assertTrue(Storage.TYPE.TABLE == ProcessHelper.getStorageType(cluster, process));
+ assertHCatCredentials(parentWorkflow, wfPath);
+
+ ConfigurationStore.get().remove(EntityType.PROCESS, process.getName());
+ }
+
+ @Test (dataProvider = "secureOptions")
+ public void testHiveProcessMapperWithFSInputFeedAndTableOutputFeed(String secureOption) throws Exception {
+ StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);
+
+ URL resource = this.getClass().getResource("/config/feed/hive-table-feed-out.xml");
+ Feed outFeed = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(resource);
+ ConfigurationStore.get().publish(EntityType.FEED, outFeed);
+
+ resource = this.getClass().getResource("/config/process/hive-process-FSInputFeed.xml");
+ Process process = (Process) EntityType.PROCESS.getUnmarshaller().unmarshal(resource);
+ ConfigurationStore.get().publish(EntityType.PROCESS, process);
+
+ prepare(process);
+ OozieEntityBuilder builder = OozieEntityBuilder.get(process);
+ Path bundlePath = new Path("/falcon/staging/workflows", process.getName());
+ builder.build(cluster, bundlePath);
+ assertTrue(fs.exists(bundlePath));
+
+ BUNDLEAPP bundle = getBundle(fs, bundlePath);
+ assertEquals(EntityUtil.getWorkflowName(process).toString(), bundle.getName());
+ assertEquals(1, bundle.getCoordinator().size());
+ assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString(),
+ bundle.getCoordinator().get(0).getName());
+ String coordPath = bundle.getCoordinator().get(0).getAppPath().replace("${nameNode}", "");
+
+ COORDINATORAPP coord = getCoordinator(fs, new Path(coordPath));
+ HashMap<String, String> props = new HashMap<String, String>();
+ for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
+ props.put(prop.getName(), prop.getValue());
+ }
+ Assert.assertEquals(props.get("logDir"), getLogPath(process));
+
+ String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
+ WORKFLOWAPP parentWorkflow = getParentWorkflow(new Path(wfPath));
+ testParentWorkflow(process, parentWorkflow);
+
+ List<Object> decisionOrForkOrJoin = parentWorkflow.getDecisionOrForkOrJoin();
+
+ ACTION hiveNode = (ACTION) decisionOrForkOrJoin.get(4);
+ Assert.assertEquals("user-hive-job", hiveNode.getName());
+
+ JAXBElement<org.apache.falcon.oozie.hive.ACTION> actionJaxbElement = OozieUtils.unMarshalHiveAction(hiveNode);
+ org.apache.falcon.oozie.hive.ACTION hiveAction = actionJaxbElement.getValue();
+
+ Assert.assertEquals(hiveAction.getScript(),
+ "${nameNode}/falcon/staging/workflows/hive-process/user/script.hql");
+ Assert.assertEquals(hiveAction.getJobXml(), "${wf:appPath()}/conf/hive-site.xml");
+ Assert.assertNull(hiveAction.getPrepare());
+ Assert.assertEquals(Collections.EMPTY_LIST, hiveAction.getArchive());
+ Assert.assertFalse(hiveAction.getParam().isEmpty());
+ Assert.assertEquals(10, hiveAction.getParam().size());
+
+ Assert.assertTrue(Storage.TYPE.TABLE == ProcessHelper.getStorageType(cluster, process));
+ assertHCatCredentials(parentWorkflow, wfPath);
+
+ ConfigurationStore.get().remove(EntityType.PROCESS, process.getName());
+ }
+
+ @Test (dataProvider = "secureOptions")
+ public void testHiveProcessMapperWithTableInputFeedAndFSOutputFeed(String secureOption) throws Exception {
+ StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);
+
+ URL resource = this.getClass().getResource("/config/feed/hive-table-feed.xml");
+ Feed inFeed = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(resource);
+ ConfigurationStore.get().publish(EntityType.FEED, inFeed);
+
+ resource = this.getClass().getResource("/config/process/hive-process-FSOutputFeed.xml");
+ Process process = (Process) EntityType.PROCESS.getUnmarshaller().unmarshal(resource);
+ ConfigurationStore.get().publish(EntityType.PROCESS, process);
+
+ prepare(process);
+ OozieEntityBuilder builder = OozieEntityBuilder.get(process);
+ Path bundlePath = new Path("/falcon/staging/workflows", process.getName());
+ builder.build(cluster, bundlePath);
+ assertTrue(fs.exists(bundlePath));
+
+ BUNDLEAPP bundle = getBundle(fs, bundlePath);
+ assertEquals(EntityUtil.getWorkflowName(process).toString(), bundle.getName());
+ assertEquals(1, bundle.getCoordinator().size());
+ assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString(),
+ bundle.getCoordinator().get(0).getName());
+ String coordPath = bundle.getCoordinator().get(0).getAppPath().replace("${nameNode}", "");
+
+ COORDINATORAPP coord = getCoordinator(fs, new Path(coordPath));
+ HashMap<String, String> props = new HashMap<String, String>();
+ for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
+ props.put(prop.getName(), prop.getValue());
+ }
+ Assert.assertEquals(props.get("logDir"), getLogPath(process));
+
+ String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
+ WORKFLOWAPP parentWorkflow = getParentWorkflow(new Path(wfPath));
+ testParentWorkflow(process, parentWorkflow);
+
+ List<Object> decisionOrForkOrJoin = parentWorkflow.getDecisionOrForkOrJoin();
+
+ ACTION hiveNode = (ACTION) decisionOrForkOrJoin.get(4);
+ Assert.assertEquals("user-hive-job", hiveNode.getName());
+
+ JAXBElement<org.apache.falcon.oozie.hive.ACTION> actionJaxbElement = OozieUtils.unMarshalHiveAction(hiveNode);
+ org.apache.falcon.oozie.hive.ACTION hiveAction = actionJaxbElement.getValue();
+
+ Assert.assertEquals(hiveAction.getScript(),
+ "${nameNode}/falcon/staging/workflows/hive-process/user/script.hql");
+ Assert.assertEquals(hiveAction.getJobXml(), "${wf:appPath()}/conf/hive-site.xml");
+ Assert.assertNotNull(hiveAction.getPrepare());
+ Assert.assertEquals(Collections.EMPTY_LIST, hiveAction.getArchive());
+ Assert.assertFalse(hiveAction.getParam().isEmpty());
+ Assert.assertEquals(6, hiveAction.getParam().size());
+
+ Assert.assertTrue(Storage.TYPE.TABLE == ProcessHelper.getStorageType(cluster, process));
+ assertHCatCredentials(parentWorkflow, wfPath);
+
+ ConfigurationStore.get().remove(EntityType.PROCESS, process.getName());
+ }
+
+ @Test (dataProvider = "secureOptions")
+ public void testHiveProcessWithNoInputsAndOutputs(String secureOption) throws Exception {
+ StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);
+
+ URL resource = this.getClass().getResource("/config/process/dumb-hive-process.xml");
+ Process process = (Process) EntityType.PROCESS.getUnmarshaller().unmarshal(resource);
+ ConfigurationStore.get().publish(EntityType.PROCESS, process);
+
+ prepare(process);
+ OozieEntityBuilder builder = OozieEntityBuilder.get(process);
+ Path bundlePath = new Path("/falcon/staging/workflows", process.getName());
+ builder.build(cluster, bundlePath);
+ assertTrue(fs.exists(bundlePath));
+
+ BUNDLEAPP bundle = getBundle(fs, bundlePath);
+ assertEquals(EntityUtil.getWorkflowName(process).toString(), bundle.getName());
+ assertEquals(1, bundle.getCoordinator().size());
+ assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString(),
+ bundle.getCoordinator().get(0).getName());
+ String coordPath = bundle.getCoordinator().get(0).getAppPath().replace("${nameNode}", "");
+
+ COORDINATORAPP coord = getCoordinator(fs, new Path(coordPath));
+ HashMap<String, String> props = new HashMap<String, String>();
+ for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
+ props.put(prop.getName(), prop.getValue());
+ }
+ Assert.assertEquals(props.get("logDir"), getLogPath(process));
+
+ String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
+ WORKFLOWAPP parentWorkflow = getParentWorkflow(new Path(wfPath));
+ testParentWorkflow(process, parentWorkflow);
+
+ List<Object> decisionOrForkOrJoin = parentWorkflow.getDecisionOrForkOrJoin();
+
+ ACTION hiveNode = (ACTION) decisionOrForkOrJoin.get(4);
+ Assert.assertEquals("user-hive-job", hiveNode.getName());
+
+ JAXBElement<org.apache.falcon.oozie.hive.ACTION> actionJaxbElement = OozieUtils.unMarshalHiveAction(hiveNode);
+ org.apache.falcon.oozie.hive.ACTION hiveAction = actionJaxbElement.getValue();
+
+ Assert.assertEquals(hiveAction.getScript(),
+ "${nameNode}/falcon/staging/workflows/hive-process/user/script.hql");
+ Assert.assertEquals(hiveAction.getJobXml(), "${wf:appPath()}/conf/hive-site.xml");
+ Assert.assertNull(hiveAction.getPrepare());
+ Assert.assertEquals(Collections.EMPTY_LIST, hiveAction.getArchive());
+ Assert.assertTrue(hiveAction.getParam().isEmpty());
+
+ ConfigurationStore.get().remove(EntityType.PROCESS, process.getName());
+ }
+
+ private void assertHCatCredentials(WORKFLOWAPP wf, String wfPath) throws IOException {
+ Path hiveConfPath = new Path(wfPath, "conf/hive-site.xml");
+ Assert.assertTrue(fs.exists(hiveConfPath));
+
+ if (SecurityUtil.isSecurityEnabled()) {
+ Assert.assertNotNull(wf.getCredentials());
+ Assert.assertEquals(1, wf.getCredentials().getCredential().size());
+ }
+
+ List<Object> actions = wf.getDecisionOrForkOrJoin();
+ for (Object obj : actions) {
+ if (!(obj instanceof ACTION)) {
+ continue;
+ }
+
+ ACTION action = (ACTION) obj;
+
+ if (!SecurityUtil.isSecurityEnabled()) {
+ Assert.assertNull(action.getCred());
+ return;
+ }
+
+ String actionName = action.getName();
+ if ("user-hive-job".equals(actionName) || "user-pig-job".equals(actionName)
+ || "user-oozie-workflow".equals(actionName) || "recordsize".equals(actionName)) {
+ Assert.assertNotNull(action.getCred());
+ Assert.assertEquals(action.getCred(), "falconHiveAuth");
+ }
+ }
+ }
+
+ private void prepare(Process process) throws IOException {
+ Path wf = new Path(process.getWorkflow().getPath());
+ fs.mkdirs(wf.getParent());
+ fs.create(wf).close();
+ }
+
+ @Test (dataProvider = "secureOptions")
+ public void testProcessMapperForTableStorage(String secureOption) throws Exception {
+ StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);
+
+ URL resource = this.getClass().getResource("/config/feed/hive-table-feed.xml");
+ Feed inFeed = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(resource);
+ ConfigurationStore.get().publish(EntityType.FEED, inFeed);
+
+ resource = this.getClass().getResource("/config/feed/hive-table-feed-out.xml");
+ Feed outFeed = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(resource);
+ ConfigurationStore.get().publish(EntityType.FEED, outFeed);
+
+ resource = this.getClass().getResource("/config/process/pig-process-table.xml");
+ Process process = (Process) EntityType.PROCESS.getUnmarshaller().unmarshal(resource);
+ ConfigurationStore.get().publish(EntityType.PROCESS, process);
+
+ OozieEntityBuilder builder = OozieEntityBuilder.get(process);
+ Path bundlePath = new Path("/falcon/staging/workflows", process.getName());
+ builder.build(cluster, bundlePath);
+ assertTrue(fs.exists(bundlePath));
+
+ BUNDLEAPP bundle = getBundle(fs, bundlePath);
+ assertEquals(EntityUtil.getWorkflowName(process).toString(), bundle.getName());
+ assertEquals(1, bundle.getCoordinator().size());
+ assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString(),
+ bundle.getCoordinator().get(0).getName());
+ String coordPath = bundle.getCoordinator().get(0).getAppPath().replace("${nameNode}", "");
+
+ COORDINATORAPP coord = getCoordinator(fs, new Path(coordPath));
+ HashMap<String, String> props = new HashMap<String, String>();
+ for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
+ props.put(prop.getName(), prop.getValue());
+ }
+
+ // verify table props
+ Map<String, String> expected = getExpectedProperties(inFeed, outFeed, process);
+ for (Map.Entry<String, String> entry : props.entrySet()) {
+ if (expected.containsKey(entry.getKey())) {
+ Assert.assertEquals(entry.getValue(), expected.get(entry.getKey()));
+ }
+ }
+ Assert.assertEquals(props.get("logDir"), getLogPath(process));
+
+ // verify the late data params
+ Assert.assertEquals(props.get("falconInputFeeds"), process.getInputs().getInputs().get(0).getFeed());
+ Assert.assertEquals(props.get("falconInPaths"), "${coord:dataIn('input')}");
+ Assert.assertEquals(props.get("falconInputFeedStorageTypes"), Storage.TYPE.TABLE.name());
+
+ // verify the post processing params
+ Assert.assertEquals(props.get("feedNames"), process.getOutputs().getOutputs().get(0).getFeed());
+ Assert.assertEquals(props.get("feedInstancePaths"), "${coord:dataOut('output')}");
+
+ String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
+ WORKFLOWAPP parentWorkflow = getParentWorkflow(new Path(wfPath));
+
+ Assert.assertTrue(Storage.TYPE.TABLE == ProcessHelper.getStorageType(cluster, process));
+ assertHCatCredentials(parentWorkflow, wfPath);
+ }
+
+ private Map<String, String> getExpectedProperties(Feed inFeed, Feed outFeed,
+ Process process) throws FalconException {
+ Map<String, String> expected = new HashMap<String, String>();
+ for (Input input : process.getInputs().getInputs()) {
+ CatalogStorage storage = (CatalogStorage) FeedHelper.createStorage(cluster, inFeed);
+ propagateStorageProperties(input.getName(), storage, expected);
+ }
+
+ for (Output output : process.getOutputs().getOutputs()) {
+ CatalogStorage storage = (CatalogStorage) FeedHelper.createStorage(cluster, outFeed);
+ propagateStorageProperties(output.getName(), storage, expected);
+ }
+
+ return expected;
+ }
+
+ private void propagateStorageProperties(String feedName, CatalogStorage tableStorage,
+ Map<String, String> props) {
+ String prefix = "falcon_" + feedName;
+ props.put(prefix + "_storage_type", tableStorage.getType().name());
+ props.put(prefix + "_catalog_url", tableStorage.getCatalogUrl());
+ props.put(prefix + "_database", tableStorage.getDatabase());
+ props.put(prefix + "_table", tableStorage.getTable());
+
+ if (prefix.equals("falcon_input")) {
+ props.put(prefix + "_partition_filter_pig", "${coord:dataInPartitionFilter('input', 'pig')}");
+ props.put(prefix + "_partition_filter_hive", "${coord:dataInPartitionFilter('input', 'hive')}");
+ props.put(prefix + "_partition_filter_java", "${coord:dataInPartitionFilter('input', 'java')}");
+ props.put(prefix + "_datain_partitions_hive", "${coord:dataInPartitions('input', 'hive-export')}");
+ } else if (prefix.equals("falcon_output")) {
+ props.put(prefix + "_dataout_partitions", "${coord:dataOutPartitions('output')}");
+ }
+ }
+
+ @Test
+ public void testProcessWorkflowMapper() throws Exception {
+ Process process = ConfigurationStore.get().get(EntityType.PROCESS, "clicksummary");
+ Workflow processWorkflow = process.getWorkflow();
+ Assert.assertEquals("test", processWorkflow.getName());
+ Assert.assertEquals("1.0.0", processWorkflow.getVersion());
+ }
+
+ @SuppressWarnings("unchecked")
+ private void assertLibExtensions(COORDINATORAPP coord) throws Exception {
+ String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
+ JAXBContext jaxbContext = JAXBContext.newInstance(WORKFLOWAPP.class);
+ WORKFLOWAPP wf = ((JAXBElement<WORKFLOWAPP>) jaxbContext.createUnmarshaller().unmarshal(
+ fs.open(new Path(wfPath, "workflow.xml")))).getValue();
+ List<Object> actions = wf.getDecisionOrForkOrJoin();
+ for (Object obj : actions) {
+ if (!(obj instanceof ACTION)) {
+ continue;
+ }
+ ACTION action = (ACTION) obj;
+ List<String> files = null;
+ if (action.getJava() != null) {
+ files = action.getJava().getFile();
+ } else if (action.getPig() != null) {
+ files = action.getPig().getFile();
+ } else if (action.getMapReduce() != null) {
+ files = action.getMapReduce().getFile();
+ }
+ if (files != null) {
+ Assert.assertTrue(files.get(files.size() - 1)
+ .endsWith("/projects/falcon/working/libext/PROCESS/ext.jar"));
+ }
+ }
+ }
+
+ private WORKFLOWAPP initializeProcessMapper(Process process, String throttle, String timeout)
+ throws Exception {
+ OozieEntityBuilder builder = OozieEntityBuilder.get(process);
+ Path bundlePath = new Path("/falcon/staging/workflows", process.getName());
+ builder.build(cluster, bundlePath);
+ assertTrue(fs.exists(bundlePath));
+
+ BUNDLEAPP bundle = getBundle(fs, bundlePath);
+ assertEquals(EntityUtil.getWorkflowName(process).toString(), bundle.getName());
+ assertEquals(1, bundle.getCoordinator().size());
+ assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString(),
+ bundle.getCoordinator().get(0).getName());
+ String coordPath = bundle.getCoordinator().get(0).getAppPath().replace("${nameNode}", "");
+
+ COORDINATORAPP coord = getCoordinator(fs, new Path(coordPath));
+ testDefCoordMap(process, coord);
+ assertEquals(coord.getControls().getThrottle(), throttle);
+ assertEquals(coord.getControls().getTimeout(), timeout);
+
+ String wfPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
+ return getParentWorkflow(new Path(wfPath));
+ }
+
+ public void testParentWorkflow(Process process, WORKFLOWAPP parentWorkflow) {
+ Assert.assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString(), parentWorkflow.getName());
+
+ List<Object> decisionOrForkOrJoin = parentWorkflow.getDecisionOrForkOrJoin();
+ Assert.assertEquals("should-record", ((DECISION) decisionOrForkOrJoin.get(0)).getName());
+ Assert.assertEquals("recordsize", ((ACTION) decisionOrForkOrJoin.get(1)).getName());
+ Assert.assertEquals("user-workflow", ((DECISION) decisionOrForkOrJoin.get(2)).getName());
+ Assert.assertEquals("user-pig-job", ((ACTION) decisionOrForkOrJoin.get(3)).getName());
+ Assert.assertEquals("user-hive-job", ((ACTION) decisionOrForkOrJoin.get(4)).getName());
+ Assert.assertEquals("user-oozie-workflow", ((ACTION) decisionOrForkOrJoin.get(5)).getName());
+ Assert.assertEquals("succeeded-post-processing", ((ACTION) decisionOrForkOrJoin.get(6)).getName());
+ Assert.assertEquals("failed-post-processing", ((ACTION) decisionOrForkOrJoin.get(7)).getName());
+ Assert.assertEquals("3", ((ACTION) decisionOrForkOrJoin.get(1)).getRetryMax());
+ Assert.assertEquals("1", ((ACTION) decisionOrForkOrJoin.get(1)).getRetryInterval());
+ Assert.assertEquals("3", ((ACTION) decisionOrForkOrJoin.get(6)).getRetryMax());
+ Assert.assertEquals("1", ((ACTION) decisionOrForkOrJoin.get(6)).getRetryInterval());
+ Assert.assertEquals("3", ((ACTION) decisionOrForkOrJoin.get(7)).getRetryMax());
+ Assert.assertEquals("1", ((ACTION) decisionOrForkOrJoin.get(7)).getRetryInterval());
+ }
+
+ @SuppressWarnings("unchecked")
+ private WORKFLOWAPP getParentWorkflow(Path path) throws Exception {
+ String workflow = readFile(fs, new Path(path, "workflow.xml"));
+
+ JAXBContext wfAppContext = JAXBContext.newInstance(WORKFLOWAPP.class);
+ Unmarshaller unmarshaller = wfAppContext.createUnmarshaller();
+ return ((JAXBElement<WORKFLOWAPP>) unmarshaller.unmarshal(
+ new StreamSource(new ByteArrayInputStream(workflow.trim().getBytes())))).getValue();
+ }
+
+ @AfterMethod
+ public void cleanup() throws Exception {
+ cleanupStore();
+ }
+
+ @Test
+ public void testProcessWithNoInputsAndOutputs() throws Exception {
+ ClusterHelper.getInterface(cluster, Interfacetype.WRITE).setEndpoint(hdfsUrl);
+
+ URL resource = this.getClass().getResource("/config/process/dumb-process.xml");
+ Process processEntity = (Process) EntityType.PROCESS.getUnmarshaller().unmarshal(resource);
+ ConfigurationStore.get().publish(EntityType.PROCESS, processEntity);
+
+ OozieEntityBuilder builder = OozieEntityBuilder.get(processEntity);
+ Path bundlePath = new Path("/falcon/staging/workflows", processEntity.getName());
+ builder.build(cluster, bundlePath);
+ assertTrue(fs.exists(bundlePath));
+
+ BUNDLEAPP bundle = getBundle(fs, bundlePath);
+ assertEquals(EntityUtil.getWorkflowName(processEntity).toString(), bundle.getName());
+ assertEquals(1, bundle.getCoordinator().size());
+ assertEquals(EntityUtil.getWorkflowName(Tag.DEFAULT, processEntity).toString(),
+ bundle.getCoordinator().get(0).getName());
+ String coordPath = bundle.getCoordinator().get(0).getAppPath().replace("${nameNode}", "");
+
+ COORDINATORAPP coord = getCoordinator(fs, new Path(coordPath));
+ HashMap<String, String> props = new HashMap<String, String>();
+ for (Property prop : coord.getAction().getWorkflow().getConfiguration().getProperty()) {
+ props.put(prop.getName(), prop.getValue());
+ }
+ Assert.assertEquals(props.get("logDir"), getLogPath(processEntity));
+
+ String[] expected = {
+ EntityInstanceMessage.ARG.feedNames.getPropName(),
+ EntityInstanceMessage.ARG.feedInstancePaths.getPropName(),
+ "falconInputFeeds",
+ "falconInPaths",
+ "userWorkflowName",
+ "userWorkflowVersion",
+ "userWorkflowEngine",
+ };
+
+ for (String property : expected) {
+ Assert.assertTrue(props.containsKey(property), "expected property missing: " + property);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/cluster/cluster-0.1.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/cluster/cluster-0.1.xml b/oozie/src/test/resources/config/cluster/cluster-0.1.xml
new file mode 100644
index 0000000..032cc77
--- /dev/null
+++ b/oozie/src/test/resources/config/cluster/cluster-0.1.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<cluster colo="gs" description="" name="corp" xmlns="uri:falcon:cluster:0.1"
+ >
+ <interfaces>
+ <interface type="readonly" endpoint="hftp://localhost:50010"
+ version="0.20.2"/>
+ <interface type="write" endpoint="hdfs://localhost:8020"
+ version="0.20.2"/>
+ <interface type="execute" endpoint="localhost:8021" version="0.20.2"/>
+ <interface type="workflow" endpoint="http://localhost:11000/oozie/"
+ version="3.1"/>
+ <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true"
+ version="5.1.6"/>
+ <interface type="registry" endpoint="Hcat" version="1"/>
+ </interfaces>
+ <locations>
+ <location name="staging" path="/projects/falcon/staging"/>
+ <location name="temp" path="/tmp"/>
+ <location name="working" path="/projects/falcon/working"/>
+ </locations>
+ <properties>
+ <property name="field1" value="value1"/>
+ <property name="field2" value="value2"/>
+ <property name="hive.metastore.client.socket.timeout" value="20"/>
+ </properties>
+</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/feed/feed-0.1.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/feed/feed-0.1.xml b/oozie/src/test/resources/config/feed/feed-0.1.xml
new file mode 100644
index 0000000..fb9b707
--- /dev/null
+++ b/oozie/src/test/resources/config/feed/feed-0.1.xml
@@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<feed description="clicks log" name="clicks" xmlns="uri:falcon:feed:0.1"
+ >
+ <partitions>
+ <partition name="fraud"/>
+ <partition name="country"/>
+ </partitions>
+
+ <groups>online,bi</groups>
+
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+ <late-arrival cut-off="hours(6)"/>
+
+ <clusters>
+ <cluster name="corp" type="source">
+ <validity start="2011-11-01T00:00Z" end="2099-12-31T23:59Z"/>
+ <retention limit="hours(6)" action="delete"/>
+ <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ <locations>
+ <location type="data" path="/projects/falcon/clicks/${YY}/${MM}"/>
+ <location type="stats" path="/projects/falcon/clicksStats"/>
+ <location type="meta" path="/projects/falcon/clicksMetaData"/>
+ </locations>
+ </cluster>
+ <cluster name="backupCluster" type="target">
+ <validity start="2011-11-01T00:00Z" end="2099-12-31T23:59Z"/>
+ <retention limit="hours(6)" action="archive"/>
+ <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ </clusters>
+
+ <locations>
+ <location type="data" path="/projects/falcon/clicks"/>
+ <location type="stats" path="/projects/falcon/clicksStats"/>
+ <location type="meta" path="/projects/falcon/clicksMetaData"/>
+ </locations>
+
+ <ACL owner="testuser" group="group" permission="0x755"/>
+ <schema location="/schema/clicks" provider="protobuf"/>
+
+ <properties>
+ <property name="field1" value="value1"/>
+ <property name="field2" value="value2"/>
+ </properties>
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/feed/hive-table-feed-out.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/feed/hive-table-feed-out.xml b/oozie/src/test/resources/config/feed/hive-table-feed-out.xml
new file mode 100644
index 0000000..bd93a01
--- /dev/null
+++ b/oozie/src/test/resources/config/feed/hive-table-feed-out.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<feed description="clicks summary table " name="clicks-summary-table" xmlns="uri:falcon:feed:0.1">
+ <groups>online,bi</groups>
+
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+ <late-arrival cut-off="hours(6)"/>
+
+ <clusters>
+ <cluster name="corp" type="source" partition="*/${cluster.colo}">
+ <validity start="2021-11-01T00:00Z" end="2021-12-31T00:00Z"/>
+ <retention limit="hours(48)" action="delete"/>
+ <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ <cluster name="backupCluster" type="target">
+ <validity start="2011-11-01T00:00Z" end="2011-12-31T00:00Z"/>
+ <retention limit="hours(6)" action="archive"/>
+ <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ </clusters>
+
+ <table uri="catalog:default:clicks-summary#ds=${YEAR}-${MONTH}-${DAY}-${HOUR}" />
+
+ <ACL owner="testuser" group="group" permission="0x755"/>
+ <schema location="/schema/clicks" provider="protobuf"/>
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/feed/hive-table-feed.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/feed/hive-table-feed.xml b/oozie/src/test/resources/config/feed/hive-table-feed.xml
new file mode 100644
index 0000000..66d0742
--- /dev/null
+++ b/oozie/src/test/resources/config/feed/hive-table-feed.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<feed description="clicks log table " name="clicks-raw-table" xmlns="uri:falcon:feed:0.1">
+ <groups>online,bi</groups>
+
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+ <late-arrival cut-off="hours(6)"/>
+
+ <clusters>
+ <cluster name="corp" type="source" partition="*/${cluster.colo}">
+ <validity start="2021-11-01T00:00Z" end="2021-12-31T00:00Z"/>
+ <retention limit="hours(48)" action="delete"/>
+ <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ <cluster name="backupCluster" type="target">
+ <validity start="2011-11-01T00:00Z" end="2011-12-31T00:00Z"/>
+ <retention limit="hours(6)" action="archive"/>
+ <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ </clusters>
+
+ <table uri="catalog:default:clicks#ds=${YEAR}-${MONTH}-${DAY}-${HOUR}" />
+
+ <ACL owner="testuser" group="group" permission="0x755"/>
+ <schema location="/schema/clicks" provider="protobuf"/>
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/late/late-cluster.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/late/late-cluster.xml b/oozie/src/test/resources/config/late/late-cluster.xml
new file mode 100644
index 0000000..ac0817f
--- /dev/null
+++ b/oozie/src/test/resources/config/late/late-cluster.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<cluster colo="gs" description="" name="late-cluster" xmlns="uri:falcon:cluster:0.1"
+ >
+ <interfaces>
+ <interface type="readonly" endpoint="hftp://localhost:50010"
+ version="0.20.2"/>
+ <interface type="write" endpoint="hdfs://localhost:8020"
+ version="0.20.2"/>
+ <interface type="execute" endpoint="localhost:8021" version="0.20.2"/>
+ <interface type="workflow" endpoint="http://localhost:11000/oozie/"
+ version="3.1"/>
+ <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true"
+ version="5.1.6"/>
+ <interface type="registry" endpoint="Hcat" version="1"/>
+ </interfaces>
+ <locations>
+ <location name="staging" path="/projects/falcon/staging"/>
+ <location name="temp" path="/tmp"/>
+ <location name="working" path="/projects/falcon/working"/>
+ </locations>
+ <properties>
+ <property name="field1" value="value1"/>
+ <property name="field2" value="value2"/>
+ </properties>
+</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/late/late-feed1.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/late/late-feed1.xml b/oozie/src/test/resources/config/late/late-feed1.xml
new file mode 100644
index 0000000..c500c4c
--- /dev/null
+++ b/oozie/src/test/resources/config/late/late-feed1.xml
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<feed description="clicks log" name="late-feed1" xmlns="uri:falcon:feed:0.1"
+ >
+ <partitions>
+ <partition name="fraud"/>
+ <partition name="country"/>
+ </partitions>
+
+ <groups>online,bi</groups>
+
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+ <late-arrival cut-off="minutes(5)"/>
+
+ <clusters>
+ <cluster name="late-cluster" type="source">
+ <validity start="2011-11-01T00:00Z" end="2099-12-31T23:59Z"/>
+ <retention limit="hours(6)" action="delete"/>
+ <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ </clusters>
+
+ <locations>
+ <location type="data" path="/projects/falcon/clicks"/>
+ <location type="stats" path="/projects/falcon/clicksStats"/>
+ <location type="meta" path="/projects/falcon/clicksMetaData"/>
+ </locations>
+
+ <ACL owner="testuser" group="group" permission="0x755"/>
+ <schema location="/schema/clicks" provider="protobuf"/>
+
+ <properties>
+ <property name="field1" value="value1"/>
+ <property name="field2" value="value2"/>
+ </properties>
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/late/late-feed2.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/late/late-feed2.xml b/oozie/src/test/resources/config/late/late-feed2.xml
new file mode 100644
index 0000000..6ccffe2
--- /dev/null
+++ b/oozie/src/test/resources/config/late/late-feed2.xml
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<feed description="clicks log" name="late-feed2" xmlns="uri:falcon:feed:0.1"
+ >
+ <partitions>
+ <partition name="fraud"/>
+ <partition name="country"/>
+ </partitions>
+
+ <groups>online,bi</groups>
+
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+ <late-arrival cut-off="minutes(5)"/>
+
+ <clusters>
+ <cluster name="late-cluster" type="source">
+ <validity start="2011-11-01T00:00Z" end="2099-12-31T23:59Z"/>
+ <retention limit="hours(6)" action="delete"/>
+ <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ </clusters>
+
+ <locations>
+ <location type="data" path="/projects/falcon/clicks"/>
+ <location type="stats" path="/projects/falcon/clicksStats"/>
+ <location type="meta" path="/projects/falcon/clicksMetaData"/>
+ </locations>
+
+ <ACL owner="testuser" group="group" permission="0x755"/>
+ <schema location="/schema/clicks" provider="protobuf"/>
+
+ <properties>
+ <property name="field1" value="value1"/>
+ <property name="field2" value="value2"/>
+ </properties>
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/late/late-feed3.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/late/late-feed3.xml b/oozie/src/test/resources/config/late/late-feed3.xml
new file mode 100644
index 0000000..239f140
--- /dev/null
+++ b/oozie/src/test/resources/config/late/late-feed3.xml
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<feed description="clicks log" name="late-feed3" xmlns="uri:falcon:feed:0.1"
+ >
+ <partitions>
+ <partition name="fraud"/>
+ <partition name="country"/>
+ </partitions>
+
+ <groups>online,bi</groups>
+
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+ <late-arrival cut-off="hours(6)"/>
+
+ <clusters>
+ <cluster name="late-cluster" type="source">
+ <validity start="2011-11-01T00:00Z" end="2099-12-31T23:59Z"/>
+ <retention limit="hours(6)" action="delete"/>
+ <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ </clusters>
+
+ <locations>
+ <location type="data" path="/projects/falcon/clicks"/>
+ <location type="stats" path="/projects/falcon/clicksStats"/>
+ <location type="meta" path="/projects/falcon/clicksMetaData"/>
+ </locations>
+
+ <ACL owner="testuser" group="group" permission="0x755"/>
+ <schema location="/schema/clicks" provider="protobuf"/>
+
+ <properties>
+ <property name="field1" value="value1"/>
+ <property name="field2" value="value2"/>
+ </properties>
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/late/late-process1.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/late/late-process1.xml b/oozie/src/test/resources/config/late/late-process1.xml
new file mode 100644
index 0000000..aba5525
--- /dev/null
+++ b/oozie/src/test/resources/config/late/late-process1.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- ~ Licensed to the Apache Software Foundation (ASF) under one ~ or more contributor license agreements. See the NOTICE file ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file ~ to you under the Apache License, Version 2.0 (the ~ "License"); you may not use this file except in compliance ~ with the
+ License. You may obtain a copy of the License at ~ ~ http://www.apache.org/licenses/LICENSE-2.0 ~ ~ Unless required by applicable law or agreed to in writing, software ~ distributed
+ under the License is distributed on an "AS IS" BASIS, ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ~ See the License for the specific language governing
+ permissions and ~ limitations under the License. -->
+<process name="late-process1" xmlns="uri:falcon:process:0.1">
+ <!-- where -->
+ <clusters>
+ <cluster name="late-cluster">
+ <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
+ </cluster>
+ </clusters>
+
+ <!-- when -->
+ <concurrency>1</concurrency>
+ <execution>LIFO</execution>
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+
+ <!-- what -->
+ <inputs>
+ <input name="impression" feed="late-feed1" start-instance="today(0,0)" end-instance="today(0,2)"/>
+ <input name="clicks" feed="late-feed2" start-instance="yesterday(0,0)" end-instance="today(0,0)"
+ partition="*/US"/>
+ </inputs>
+
+ <outputs>
+ <output name="clicksummary" feed="late-feed3" instance="today(0,0)"/>
+ </outputs>
+
+ <!-- how -->
+ <properties>
+ <property name="procprop" value="procprop"/>
+ </properties>
+
+ <workflow engine="oozie" path="/user/guest/workflow"/>
+
+ <retry policy="periodic" delay="hours(10)" attempts="3"/>
+
+</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/late/late-process2.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/late/late-process2.xml b/oozie/src/test/resources/config/late/late-process2.xml
new file mode 100644
index 0000000..bc507ad
--- /dev/null
+++ b/oozie/src/test/resources/config/late/late-process2.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<process name="late-process2" xmlns="uri:falcon:process:0.1">
+ <!-- where -->
+ <clusters>
+ <cluster name="late-cluster">
+ <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
+ </cluster>
+ </clusters>
+
+ <!-- when -->
+ <concurrency>1</concurrency>
+ <execution>LIFO</execution>
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+
+ <!-- what -->
+ <inputs>
+ <input name="impression" feed="late-feed1" start-instance="today(0,0)" end-instance="today(0,2)"/>
+ <input name="clicks" feed="late-feed2" start-instance="yesterday(0,0)" end-instance="today(0,0)"
+ partition="*/US"/>
+ </inputs>
+
+ <outputs>
+ <output name="clicksummary" feed="late-feed3" instance="today(0,0)"/>
+ </outputs>
+
+ <!-- how -->
+ <properties>
+ <property name="procprop" value="procprop"/>
+ </properties>
+
+ <workflow engine="oozie" path="/user/guest/workflow"/>
+
+ <retry policy="periodic" delay="hours(10)" attempts="3"/>
+
+ <late-process policy="exp-backoff" delay="hours(1)">
+ <late-input feed="impression" workflow-path="hdfs://impression/late/workflow"/>
+ <late-input feed="clicks" workflow-path="hdfs://clicks/late/workflow"/>
+ </late-process>
+</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/process/dumb-hive-process.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/process/dumb-hive-process.xml b/oozie/src/test/resources/config/process/dumb-hive-process.xml
new file mode 100644
index 0000000..c504074
--- /dev/null
+++ b/oozie/src/test/resources/config/process/dumb-hive-process.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<process name="hive-process" xmlns="uri:falcon:process:0.1">
+ <!-- where -->
+ <clusters>
+ <cluster name="corp">
+ <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
+ </cluster>
+ </clusters>
+
+ <!-- when -->
+ <parallel>1</parallel>
+ <order>LIFO</order>
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+
+ <!-- what = none -->
+
+ <!-- how -->
+ <workflow engine="hive" path="/apps/hive/script.hql"/>
+
+ <retry policy="periodic" delay="minutes(10)" attempts="3"/>
+</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/process/dumb-process.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/process/dumb-process.xml b/oozie/src/test/resources/config/process/dumb-process.xml
new file mode 100644
index 0000000..b71f089
--- /dev/null
+++ b/oozie/src/test/resources/config/process/dumb-process.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<process name="dumb-process" xmlns="uri:falcon:process:0.1">
+ <!-- where -->
+ <clusters>
+ <cluster name="corp">
+ <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
+ </cluster>
+ </clusters>
+
+ <!-- when -->
+ <parallel>1</parallel>
+ <order>LIFO</order>
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+
+ <!-- what = none -->
+
+ <!-- how -->
+ <workflow engine="pig" path="/apps/pig/id.pig"/>
+
+ <retry policy="periodic" delay="hours(10)" attempts="3"/>
+
+</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/process/hive-process-FSInputFeed.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/process/hive-process-FSInputFeed.xml b/oozie/src/test/resources/config/process/hive-process-FSInputFeed.xml
new file mode 100644
index 0000000..d871377
--- /dev/null
+++ b/oozie/src/test/resources/config/process/hive-process-FSInputFeed.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<process name="hive-process" xmlns="uri:falcon:process:0.1">
+ <!-- where -->
+ <clusters>
+ <cluster name="corp">
+ <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
+ </cluster>
+ </clusters>
+
+ <!-- when -->
+ <parallel>1</parallel>
+ <order>LIFO</order>
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+
+ <!-- what -->
+ <inputs>
+ <input name="input" feed="clicks" start="yesterday(0,0)" end="yesterday(20,0)"/>
+ </inputs>
+
+ <outputs>
+ <output name="output" feed="clicks-summary-table" instance="today(0,0)"/>
+ </outputs>
+
+ <!-- how -->
+ <workflow engine="hive" path="/apps/hive/script.hql"/>
+
+ <retry policy="periodic" delay="minutes(10)" attempts="3"/>
+</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/process/hive-process-FSOutputFeed.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/process/hive-process-FSOutputFeed.xml b/oozie/src/test/resources/config/process/hive-process-FSOutputFeed.xml
new file mode 100644
index 0000000..23d96c3
--- /dev/null
+++ b/oozie/src/test/resources/config/process/hive-process-FSOutputFeed.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<process name="hive-process" xmlns="uri:falcon:process:0.1">
+ <!-- where -->
+ <clusters>
+ <cluster name="corp">
+ <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
+ </cluster>
+ </clusters>
+
+ <!-- when -->
+ <parallel>1</parallel>
+ <order>LIFO</order>
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+
+ <!-- what -->
+ <inputs>
+ <input name="input" feed="clicks-raw-table" start="yesterday(0,0)" end="yesterday(20,0)"/>
+ </inputs>
+
+ <outputs>
+ <output name="output" feed="clicks" instance="today(0,0)"/>
+ </outputs>
+
+ <!-- how -->
+ <workflow engine="hive" path="/apps/hive/script.hql"/>
+
+ <retry policy="periodic" delay="minutes(10)" attempts="3"/>
+</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/process/hive-process.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/process/hive-process.xml b/oozie/src/test/resources/config/process/hive-process.xml
new file mode 100644
index 0000000..4dac8e9
--- /dev/null
+++ b/oozie/src/test/resources/config/process/hive-process.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<process name="hive-process" xmlns="uri:falcon:process:0.1">
+ <!-- where -->
+ <clusters>
+ <cluster name="corp">
+ <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
+ </cluster>
+ </clusters>
+
+ <!-- when -->
+ <parallel>1</parallel>
+ <order>LIFO</order>
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+
+ <!-- what -->
+ <inputs>
+ <input name="input" feed="clicks-raw-table" start="yesterday(0,0)" end="yesterday(20,0)"/>
+ </inputs>
+
+ <outputs>
+ <output name="output" feed="clicks-summary-table" instance="today(0,0)"/>
+ </outputs>
+
+ <!-- how -->
+ <workflow engine="hive" path="/apps/hive/script.hql"/>
+
+ <retry policy="periodic" delay="minutes(10)" attempts="3"/>
+</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/process/pig-process-0.1.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/process/pig-process-0.1.xml b/oozie/src/test/resources/config/process/pig-process-0.1.xml
new file mode 100644
index 0000000..318f0da
--- /dev/null
+++ b/oozie/src/test/resources/config/process/pig-process-0.1.xml
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<process name="pig-process" xmlns="uri:falcon:process:0.1">
+ <!-- where -->
+ <clusters>
+ <cluster name="corp">
+ <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
+ </cluster>
+ </clusters>
+
+ <!-- when -->
+ <parallel>1</parallel>
+ <order>LIFO</order>
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+
+ <!-- what -->
+ <inputs>
+ <input name="impression" feed="impressions" start="today(0,0)" end="today(0,2)"/>
+ <input name="click" feed="clicks" start="yesterday(0,0)" end="latest(0)" partition="*/US"/>
+ </inputs>
+
+ <outputs>
+ <output name="clicksummary" feed="impressions" instance="today(0,0)"/>
+ </outputs>
+
+ <properties>
+ <property name="procprop" value="procprop"/>
+ <property name="mapred.job.priority" value="LOW"/>
+ </properties>
+
+ <!-- how -->
+ <workflow engine="pig" path="/apps/pig/id.pig"/>
+
+ <retry policy="periodic" delay="hours(10)" attempts="3"/>
+
+</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/process/pig-process-table.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/process/pig-process-table.xml b/oozie/src/test/resources/config/process/pig-process-table.xml
new file mode 100644
index 0000000..37aca10
--- /dev/null
+++ b/oozie/src/test/resources/config/process/pig-process-table.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<process name="table-process" xmlns="uri:falcon:process:0.1">
+ <!-- where -->
+ <clusters>
+ <cluster name="corp">
+ <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
+ </cluster>
+ </clusters>
+
+ <!-- when -->
+ <parallel>1</parallel>
+ <order>LIFO</order>
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+
+ <!-- what -->
+ <inputs>
+ <input name="input" feed="clicks-raw-table" start="yesterday(0,0)" end="yesterday(20,0)"/>
+ </inputs>
+
+ <outputs>
+ <output name="output" feed="clicks-summary-table" instance="today(0,0)"/>
+ </outputs>
+
+ <!-- how -->
+ <workflow engine="pig" path="/apps/pig/id.pig"/>
+
+ <retry policy="periodic" delay="minutes(10)" attempts="3"/>
+</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/config/process/process-0.1.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/config/process/process-0.1.xml b/oozie/src/test/resources/config/process/process-0.1.xml
new file mode 100644
index 0000000..6148441
--- /dev/null
+++ b/oozie/src/test/resources/config/process/process-0.1.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- ~ Licensed to the Apache Software Foundation (ASF) under one ~ or more contributor license agreements. See the NOTICE file ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file ~ to you under the Apache License, Version 2.0 (the ~ "License"); you may not use this file except in compliance ~ with the
+ License. You may obtain a copy of the License at ~ ~ http://www.apache.org/licenses/LICENSE-2.0 ~ ~ Unless required by applicable law or agreed to in writing, software ~ distributed
+ under the License is distributed on an "AS IS" BASIS, ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ~ See the License for the specific language governing
+ permissions and ~ limitations under the License. -->
+<process name="sample" xmlns="uri:falcon:process:0.1">
+ <!-- where -->
+ <clusters>
+ <cluster name="corp">
+ <validity start="2011-11-02T00:00Z" end="2011-12-30T00:00Z"/>
+ </cluster>
+ </clusters>
+
+ <!-- when -->
+ <parallel>1</parallel>
+ <order>LIFO</order>
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+
+ <!-- what -->
+ <inputs>
+ <input name="impression" feed="impressions" start="today(0,0)" end="today(0,2)"/>
+ <input name="click" feed="clicks" start="yesterday(0,0)" end="latest(0)" partition="*/US"/>
+ </inputs>
+
+ <outputs>
+ <output name="clicksummary" feed="impressions" instance="today(0,0)"/>
+ </outputs>
+
+ <!-- how -->
+ <properties>
+ <property name="procprop" value="procprop"/>
+ <property name="mapred.job.priority" value="LOW"/>
+ </properties>
+
+ <workflow name="test" version="1.0.0" engine="oozie" path="/user/guest/workflow"/>
+
+ <retry policy="periodic" delay="hours(10)" attempts="3"/>
+
+ <late-process policy="exp-backoff" delay="hours(1)">
+ <late-input input="impression" workflow-path="hdfs://impression/late/workflow"/>
+ <late-input input="click" workflow-path="hdfs://clicks/late/workflow"/>
+ </late-process>
+</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/feed/feed.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/feed/feed.xml b/oozie/src/test/resources/feed/feed.xml
new file mode 100644
index 0000000..4da222e
--- /dev/null
+++ b/oozie/src/test/resources/feed/feed.xml
@@ -0,0 +1,56 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<feed description="clicks log" name="raw-logs" xmlns="uri:falcon:feed:0.1"
+ >
+
+ <groups>online,bi</groups>
+
+ <frequency>minutes(20)</frequency>
+ <timezone>UTC</timezone>
+
+ <late-arrival cut-off="minutes(3)"/>
+ <clusters>
+ <cluster name="corp1" type="source" delay="minutes(40)">
+ <validity start="2010-01-01T00:00Z" end="2020-01-01T02:00Z"/>
+ <retention limit="minutes(5)" action="delete"/>
+ <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ <cluster name="corp2" type="target">
+ <validity start="2010-01-01T00:00Z" end="2020-01-01T02:00Z"/>
+ <retention limit="minutes(7)" action="delete"/>
+ <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ </clusters>
+
+ <locations>
+ <location type="data"
+ path="/examples/input-data/rawLogs/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}"/>
+ <location type="stats" path="/projects/falcon/clicksStats"/>
+ <location type="meta" path="/projects/falcon/clicksMetaData"/>
+ </locations>
+
+ <ACL owner="testuser" group="group" permission="0x755"/>
+ <schema location="/schema/clicks" provider="protobuf"/>
+
+ <properties>
+ <property name="field3" value="value3"/>
+ <property name="field2" value="value2"/>
+
+ <property name="field4" value="value2"/>
+ </properties>
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/185b5888/oozie/src/test/resources/feed/fs-replication-feed.xml
----------------------------------------------------------------------
diff --git a/oozie/src/test/resources/feed/fs-replication-feed.xml b/oozie/src/test/resources/feed/fs-replication-feed.xml
new file mode 100644
index 0000000..bada507
--- /dev/null
+++ b/oozie/src/test/resources/feed/fs-replication-feed.xml
@@ -0,0 +1,68 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<feed description="billing RC File" name="replication-test" xmlns="uri:falcon:feed:0.1">
+ <partitions>
+ <partition name="colo"/>
+ <partition name="eventTime"/>
+ <partition name="impressionHour"/>
+ <partition name="pricingModel"/>
+ </partitions>
+
+ <groups>online,bi</groups>
+
+ <frequency>minutes(5)</frequency>
+ <timezone>UTC</timezone>
+ <late-arrival cut-off="minutes(1)"/>
+
+ <clusters>
+ <cluster partition="${cluster.colo}" type="source" name="corp1">
+ <validity end="2099-01-01T00:00Z" start="2012-10-01T12:00Z"/>
+ <retention action="delete" limit="days(10000)"/>
+ </cluster>
+ <cluster type="target" name="alpha">
+ <validity end="2012-10-01T12:11Z" start="2012-10-01T12:05Z"/>
+ <retention action="delete" limit="days(10000)"/>
+ <locations>
+ <location path="/localDC/rc/billing/ua1/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}/" type="data"/>
+ </locations>
+ </cluster>
+ <cluster type="target" name="beta">
+ <validity end="2012-10-01T12:26Z" start="2012-10-01T12:10Z"/>
+ <retention action="delete" limit="days(10000)"/>
+ <locations>
+ <location path="/localDC/rc/billing/ua2/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}/" type="data"/>
+ </locations>
+ </cluster>
+ </clusters>
+
+ <locations>
+ <location
+ path="/localDC/rc/billing/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}/"
+ type="data"/>
+ <location path="/data/regression/fetlrc/billing/stats" type="stats"/>
+ <location path="/data/regression/fetlrc/billing/metadata"
+ type="meta"/>
+ </locations>
+
+ <ACL permission="0x755" group="group" owner="fetl"/>
+ <schema provider="protobuf" location="/databus/streams_local/click_rr/schema/"/>
+ <properties>
+ <property name="maxMaps" value="33" />
+ <property name="mapBandwidthKB" value="2048" />
+ </properties>
+</feed>