You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@falcon.apache.org by sa...@apache.org on 2014/08/04 12:04:02 UTC
[03/27] adding falcon-regression
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/OozieExampleInputData/normalInput/log4j.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/OozieExampleInputData/normalInput/log4j.xml b/falcon-regression/merlin/src/test/resources/OozieExampleInputData/normalInput/log4j.xml
new file mode 100644
index 0000000..045002c
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/OozieExampleInputData/normalInput/log4j.xml
@@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
+
+<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
+ <appender name="console" class="org.apache.log4j.ConsoleAppender">
+ <param name="Target" value="System.out"/>
+ <layout class="org.apache.log4j.PatternLayout">
+ <param name="ConversionPattern" value="%d %-5p - %m (%c{1}:%L)%n"/>
+ </layout>
+ </appender>
+
+ <appender name="FILE" class="org.apache.log4j.DailyRollingFileAppender">
+ <param name="File" value="/var/log/ivory/application.log"/>
+ <param name="Append" value="true"/>
+ <param name="Threshold" value="debug"/>
+ <layout class="org.apache.log4j.PatternLayout">
+ <param name="ConversionPattern" value="%d %-5p - %m (%c{1}:%L)%n"/>
+ </layout>
+ </appender>
+
+ <appender name="AUDIT" class="org.apache.log4j.DailyRollingFileAppender">
+ <param name="File" value="/var/log/ivory/audit.log"/>
+ <param name="Append" value="true"/>
+ <param name="Threshold" value="debug"/>
+ <layout class="org.apache.log4j.PatternLayout">
+ <param name="ConversionPattern" value="%d %-5p - %m%n"/>
+ </layout>
+ </appender>
+
+ <logger name="org.apache.ivory" additivity="false">
+ <level value="debug"/>
+ <appender-ref ref="console" />
+ </logger>
+
+ <logger name="AUDIT">
+ <level value="info"/>
+ <appender-ref ref="AUDIT" />
+ </logger>
+
+ <root>
+ <priority value ="info" />
+ <appender-ref ref="console" />
+ </root>
+
+</log4j:configuration>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/OozieExampleInputData/normalInput/log_01.txt
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/OozieExampleInputData/normalInput/log_01.txt b/falcon-regression/merlin/src/test/resources/OozieExampleInputData/normalInput/log_01.txt
new file mode 100644
index 0000000..79b343f
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/OozieExampleInputData/normalInput/log_01.txt
@@ -0,0 +1,174 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+package com.inmobi.qa.airavatqa.prism;
+
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+
+import org.testng.Assert;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import com.inmobi.qa.airavatqa.core.Bundle;
+import com.inmobi.qa.airavatqa.core.ColoHelper;
+import com.inmobi.qa.airavatqa.core.PrismHelper;
+import com.inmobi.qa.airavatqa.core.ServiceResponse;
+import com.inmobi.qa.airavatqa.core.Util;
+import com.inmobi.qa.airavatqa.core.Util.URLS;
+import com.inmobi.qa.airavatqa.core.instanceUtil;
+import com.inmobi.qa.airavatqa.core.xmlUtil;
+import com.inmobi.qa.airavatqa.generated.feed.ActionType;
+import com.inmobi.qa.airavatqa.generated.feed.ClusterType;
+import com.inmobi.qa.airavatqa.generated.feed.TimezoneType;
+
+public class PrismFeedLateReplicationTest {
+
+
+ @BeforeMethod(alwaysRun=true)
+ public void testName(Method method) throws Exception
+ {
+ Util.print("test name: "+method.getName());
+ //restart server as precaution
+ Util.restartService(ua1.getClusterHelper());
+ Util.restartService(ua2.getClusterHelper());
+ Util.restartService(ua3.getClusterHelper());
+
+
+ }
+
+ public PrismFeedLateReplicationTest() throws Exception{
+
+ }
+
+ PrismHelper prismHelper=new PrismHelper("prism.properties");
+
+ ColoHelper ua1=new ColoHelper("mk-qa.config.properties");
+
+ ColoHelper ua2 = new ColoHelper("ivoryqa-1.config.properties");
+
+ ColoHelper ua3 = new ColoHelper("gs1001.config.properties");
+
+ @SuppressWarnings("deprecation")
+ @Test
+ public void multipleSourceOneTarget() throws Exception
+ {
+
+ Bundle b1 = (Bundle)Util.readELBundles()[0][0];
+ b1.generateUniqueBundle();
+ Bundle b2 = (Bundle)Util.readELBundles()[0][0];
+ b2.generateUniqueBundle();
+ Bundle b3 = (Bundle)Util.readELBundles()[0][0];
+ b3.generateUniqueBundle();
+
+ try{
+ b1 = new Bundle(b1,ua1.getEnvFileName());
+ b2 = new Bundle(b2,ua2.getEnvFileName());
+ b3 = new Bundle(b3,ua3.getEnvFileName());
+
+
+ b1.setInputFeedDataPath("/samarthRetention/input-data/rawLogs/oozieExample/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}/");
+
+ b1.setCLusterColo("ua1");
+ Util.print("cluster b1: "+b1.getClusters().get(0));
+
+ ServiceResponse r = prismHelper.getClusterHelper().submitEntity(URLS.SUBMIT_URL,b1.getClusters().get(0));
+ Assert.assertTrue(r.getMessage().contains("SUCCEEDED"));
+
+
+ b2.setCLusterColo("ua2");
+ Util.print("cluster b2: "+b2.getClusters().get(0));
+ r = prismHelper.getClusterHelper().submitEntity(URLS.SUBMIT_URL,b2.getClusters().get(0));
+ Assert.assertTrue(r.getMessage().contains("SUCCEEDED"));
+
+
+ b3.setCLusterColo("ua3");
+ Util.print("cluster b3: "+b3.getClusters().get(0));
+ r = prismHelper.getClusterHelper().submitEntity(URLS.SUBMIT_URL,b3.getClusters().get(0));
+ Assert.assertTrue(r.getMessage().contains("SUCCEEDED"));
+
+
+ String feed = b1.getDataSets().get(0);
+ feed = instanceUtil.setFeedCluster(feed,xmlUtil.createValidity("2009-02-01T00:00Z","2012-01-01T00:00Z",TimezoneType.UTC),xmlUtil.createRtention("hours(10)",ActionType.DELETE),null,ClusterType.SOURCE,null);
+
+ String postFix = "/US/ua2" ;
+ String prefix = b1.getFeedDataPathPrefix();
+ Util.HDFSCleanup(ua2,prefix.substring(1));
+ Util.lateDataReplenish(ua2,90,0,1,prefix,postFix);
+
+
+ postFix = "/UK/ua3" ;
+ prefix = b1.getFeedDataPathPrefix();
+ Util.HDFSCleanup(ua3,prefix.substring(1));
+ Util.lateDataReplenish(ua3,90,0,1,prefix,postFix);
+
+ String startTime = instanceUtil.getTimeWrtSystemTime(-30);
+
+ feed = instanceUtil.setFeedCluster(feed,xmlUtil.createValidity(startTime,"2099-01-01T00:00Z",TimezoneType.UTC),xmlUtil.createRtention("hours(10)",ActionType.DELETE),Util.readClusterName(b2.getClusters().get(0)),ClusterType.SOURCE,"US/${cluster.colo}");
+ feed = instanceUtil.setFeedCluster(feed,xmlUtil.createValidity(startTime,"2099-01-01T00:00Z",TimezoneType.UTC),xmlUtil.createRtention("hours(10)",ActionType.DELETE),Util.readClusterName(b1.getClusters().get(0)),ClusterType.TARGET,null);
+ feed = instanceUtil.setFeedCluster(feed,xmlUtil.createValidity(startTime,"2099-01-01T00:00Z",TimezoneType.UTC),xmlUtil.createRtention("hours(10)",ActionType.DELETE),Util.readClusterName(b3.getClusters().get(0)),ClusterType.SOURCE,"UK/${cluster.colo}");
+
+
+ Util.print("feed: "+feed);
+
+ r= prismHelper.getFeedHelper().submitAndSchedule(URLS.SUBMIT_AND_SCHEDULE_URL, feed);
+ Thread.sleep(10000);
+
+ String TargetBundleID = instanceUtil.getLatestBundleID(Util.readDatasetName(feed),"FEED",ua1.getFeedHelper());
+
+ //wait till 1st instance of replication coord is SUCCEEDED
+ ArrayList<String> replicationCoordIDTarget = instanceUtil.getReplicationCoordID(TargetBundleID,ua1.getFeedHelper());
+
+ for(int i = 0 ; i < 30 ; i++)
+ {
+ if(instanceUtil.getInstanceStatusFromCoord(ua1,replicationCoordIDTarget.get(0),0).toString().equals("SUCCEEDED") && instanceUtil.getInstanceStatusFromCoord(ua1,replicationCoordIDTarget.get(1),0).toString().equals("SUCCEEDED"))
+ break;
+ Thread.sleep(20000);
+ }
+
+ Thread.sleep(15000);
+
+ ArrayList<String> inputFolderListForColo1= instanceUtil.getInputFoldersForInstanceForReplication(ua1,replicationCoordIDTarget.get(0),0);
+ ArrayList<String> inputFolderListForColo2= instanceUtil.getInputFoldersForInstanceForReplication(ua1,replicationCoordIDTarget.get(1),0);
+
+ instanceUtil.putDataInFolders(ua2, inputFolderListForColo1);
+ instanceUtil.putDataInFolders(ua3, inputFolderListForColo2);
+
+
+
+ Util.print("folder list 1: "+inputFolderListForColo1.toString());
+ Util.print("folder list 2: "+inputFolderListForColo2.toString());
+
+
+
+
+ }
+
+ finally{
+ prismHelper.getProcessHelper().delete(URLS.DELETE_URL,b1.getProcessData());
+ prismHelper.getFeedHelper().delete(URLS.DELETE_URL, b1.getDataSets().get(0));
+ prismHelper.getFeedHelper().delete(URLS.DELETE_URL, b1.getDataSets().get(1));
+ prismHelper.getClusterHelper().delete(URLS.DELETE_URL,b1.getClusters().get(0));
+
+ }
+ }
+
+
+
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/ReplicationResources/cluster-0.1.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/ReplicationResources/cluster-0.1.xml b/falcon-regression/merlin/src/test/resources/ReplicationResources/cluster-0.1.xml
new file mode 100644
index 0000000..cdb14ae
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/ReplicationResources/cluster-0.1.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<cluster colo="ua1" description="" name="corp" xmlns="uri:falcon:cluster:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <interfaces>
+ <interface type="readonly" endpoint="http://gs1001.grid.corp.inmobi.com:50070"
+ version="0.20.2" />
+ <interface type="write" endpoint="hdfs://gs1001.grid.corp.inmobi.com:54310"
+ version="0.20.2" />
+ <interface type="execute" endpoint="hdfs://gs1001.grid.corp.inmobi.com:54311" version="0.20.2" />
+ <interface type="workflow" endpoint="http://gs1001.grid.corp.inmobi.com:11000/oozie/"
+ version="3.1" />
+ <interface type="messaging" endpoint="tcp://gs1001.grid.corp.inmobi.com:61616?daemon=true"
+ version="5.1.6" />
+ </interfaces>
+ <locations>
+ <location name="staging" path="/projects/ivory/staging" />
+ <location name="temp" path="/tmp" />
+ <location name="working" path="/projectsTest/ivory/working" />
+ </locations>
+ <properties>
+ <property name="field1" value="value1" />
+ <property name="field2" value="value2" />
+ </properties>
+</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/ReplicationResources/feed-s4Replication.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/ReplicationResources/feed-s4Replication.xml b/falcon-regression/merlin/src/test/resources/ReplicationResources/feed-s4Replication.xml
new file mode 100644
index 0000000..96fa910
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/ReplicationResources/feed-s4Replication.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<feed name="iat-download-data-1" description="iat-download-data-1" xmlns="uri:falcon:feed:0.1">
+ <availabilityFlag>_SUCCESS</availabilityFlag>
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+ <clusters>
+ <cluster name="corp" type="source">
+ <validity start="2012-12-26T05:00Z" end="2034-12-20T08:00Z"/>
+ <retention limit="months(36)" action="delete"/>
+ <locations>
+ <location type="data" path="s4://inmobi-iat-data/userplatform/${YEAR}/${MONTH}/${DAY}/${HOUR}"/>
+ </locations>
+ </cluster>
+ </clusters>
+ <locations>
+ <location type="data" path="/projects/userplatform/iatdownload/${YEAR}/${MONTH}/${DAY}/${HOUR}"/>
+ </locations>
+ <ACL owner="rmcuser" group="group" permission="0x755"/>
+ <schema location="/schema/clicks" provider="protobuf"/>
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/ReplicationResources/id.pig
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/ReplicationResources/id.pig b/falcon-regression/merlin/src/test/resources/ReplicationResources/id.pig
new file mode 100644
index 0000000..11f227e
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/ReplicationResources/id.pig
@@ -0,0 +1,20 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+A = load '$INPUT' using PigStorage(':');
+B = foreach A generate $0 as id;
+store B into '$OUTPUT' USING PigStorage();
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/ReplicationResources/log4testng.properties
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/ReplicationResources/log4testng.properties b/falcon-regression/merlin/src/test/resources/ReplicationResources/log4testng.properties
new file mode 100644
index 0000000..fd6f966
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/ReplicationResources/log4testng.properties
@@ -0,0 +1,28 @@
+##
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# log4testng will log its own behavior (generally used for debugging this package only).
+ log4testng.debug=false
+
+ # Specifies the root Loggers logging level. Will log DEBUG level and above
+ log4testng.rootLogger=DEBUG
+
+ # The org.testng.reporters.EmailableReporter Logger will log TRACE level and above
+ log4testng.logger.org.testng.reporters.EmailableReporter=TRACE
+
+ # All Logger in packages below org.testng will log WARN level and above
+ log4testng.logger.org.testng=INFO
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/ReplicationResources/process-agg.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/ReplicationResources/process-agg.xml b/falcon-regression/merlin/src/test/resources/ReplicationResources/process-agg.xml
new file mode 100644
index 0000000..edfc1af
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/ReplicationResources/process-agg.xml
@@ -0,0 +1,52 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<process name="agregator-coord16" xmlns="uri:falcon:process:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+ <clusters>
+ <cluster name="corp">
+ <validity end="2011-01-03T03:00Z" start="2010-01-02T01:00Z" />
+ </cluster>
+ </clusters>
+ <parallel>1</parallel>
+ <order>FIFO</order>
+ <frequency>minutes(5)</frequency>
+ <timezone>UTC</timezone>
+
+ <inputs>
+ <input end="now(0,0)" start="now(0,-20)"
+ feed="raaw-logs16" name="inputData"/>
+ </inputs>
+ <outputs>
+ <output instance="now(0,0)" feed="agregated-logs16"
+ name="outputData" />
+ </outputs>
+ <properties>
+ <property name="queueName" value="default"/>
+
+ <property name="fileTime" value="${formatTime(dateOffset(instanceTime(), 1, 'DAY'), 'yyyy-MMM-dd')}"/>
+ <property name="user" value="${user()}"/>
+ <property name="baseTime" value="${today(0,0)}"/>
+
+ </properties>
+ <workflow path="/examples/apps/aggregator" />
+ <retry policy="periodic" delay="minutes(3)" attempts="3" />
+
+</process>
+
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/cluster-0.1.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/cluster-0.1.xml b/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/cluster-0.1.xml
new file mode 100644
index 0000000..6a00752
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/cluster-0.1.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<cluster colo="gs" description="" name="corp" xmlns="uri:falcon:cluster:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <interfaces>
+ <interface type="readonly" endpoint="hftp://gs1001.grid.corp.inmobi.com:50070"
+ version="0.20.2" />
+ <interface type="write" endpoint="hdfs://gs1001.grid.corp.inmobi.com:54310"
+ version="0.20.2" />
+ <interface type="execute" endpoint="hdfs://gs1001.grid.corp.inmobi.com:54311" version="0.20.2" />
+ <interface type="workflow" endpoint="http://gs1001.grid.corp.inmobi.com:11000/oozie/"
+ version="3.1" />
+ <interface type="messaging" endpoint="tcp://gs1001.grid.corp.inmobi.com:61616?daemon=true"
+ version="5.1.6" />
+ </interfaces>
+ <locations>
+ <location name="staging" path="/projects/ivory/staging" />
+ <location name="temp" path="/tmp" />
+ <location name="working" path="/projects/ivory/working" />
+ </locations>
+ <properties>
+ <property name="field1" value="value1" />
+ <property name="field2" value="value2" />
+ </properties>
+</cluster>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/feed-hdfsoutputdir.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/feed-hdfsoutputdir.xml b/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/feed-hdfsoutputdir.xml
new file mode 100644
index 0000000..d14a280
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/feed-hdfsoutputdir.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<feed xmlns="uri:falcon:feed:0.1" name="outputhdfsdir" description="clicks log">
+ <partitions/>
+<!-- <groups>online,bi</groups>-->
+ <frequency>days(1)</frequency>
+ <timezone>UTC</timezone>
+ <late-arrival cut-off="hours(6)"/>
+ <clusters>
+ <cluster name="corp" type="source">
+ <validity start="2012-01-30T00:00Z" end="2099-12-31T23:59Z"/>
+ <retention limit="hours(6)" action="delete"/>
+ </cluster>
+ </clusters>
+ <locations>
+ <location type="meta" path="/projects/ivory/clicksMetaData"/>
+ <location type="stats" path="/projects/ivory/clicksStats"/>
+ <location type="data" path="/projects/bi/rmc/daily/AdvInteractionSummary/${YEAR}-${MONTH}-${DAY}"/>
+ </locations>
+ <ACL owner="testuser" group="group" permission="0x755"/>
+ <schema location="/schema/clicks" provider="protobuf"/>
+ <properties/>
+</feed>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/feed-inpath.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/feed-inpath.xml b/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/feed-inpath.xml
new file mode 100644
index 0000000..326830b
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/feed-inpath.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<!--
+ Document : feed1.xml
+ Created on : February 14, 2012, 2:07 PM
+ Author : rishu.mehrotra
+ Description:
+ Purpose of the document follows.
+-->
+
+<feed description="clicks log" name="inPath" xmlns="uri:falcon:feed:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+<!-- <partitions>
+ <partition name="min" />
+ <partition name="fraud" />
+ <partition name="min" />
+ </partitions>
+
+ <groups>online,bi</groups>-->
+
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+ <late-arrival cut-off="hours(6)" />
+ <clusters>
+ <cluster name="corp" type="source">
+ <validity start="2012-01-30T00:00Z" end="2099-03-31T23:59Z"/>
+ <retention limit="hours(10)" action="delete" /> <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ </clusters>
+
+ <locations>
+ <location type="data" path="/retention/testFolders/${YEAR}-${MONTH}-${DAY}" />
+ <location type="stats" path="/projects/ivory/clicksStats" />
+ <location type="meta" path="/projects/ivory/clicksMetaData" />
+ </locations>
+
+ <ACL owner="testuser" group="group" permission="0x755" />
+ <schema location="/schema/clicks" provider="protobuf" />
+
+ <properties>
+ </properties>
+</feed>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/feed-interpath.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/feed-interpath.xml b/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/feed-interpath.xml
new file mode 100644
index 0000000..d3c0326
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/feed-interpath.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<feed xmlns="uri:falcon:feed:0.1" name="interPath" description="clicks log">
+ <partitions/>
+<!-- <groups>online,bi</groups>-->
+ <frequency>days(1)</frequency>
+ <timezone>UTC</timezone>
+ <late-arrival cut-off="hours(6)"/>
+ <clusters>
+ <cluster name="corp" type="source">
+ <validity start="2012-01-30T00:00Z" end="2099-01-31T23:59Z"/>
+ <retention limit="months(6)" action="delete"/>
+ </cluster>
+ </clusters>
+ <locations>
+ <location type="meta" path="/projects/ivory/clicksMetaData"/>
+ <location type="stats" path="/projects/ivory/clicksStats"/>
+ <location type="data" path="/projects/bi/ioout/interactions/${YEAR}-${MONTH}-${DAY}"/>
+ </locations>
+ <ACL owner="testuser" group="group" permission="0x755"/>
+ <schema location="/schema/clicks" provider="protobuf"/>
+ <properties/>
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/process.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/process.xml b/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/process.xml
new file mode 100644
index 0000000..0353c25
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/RetentionBundles/valid/bundle1/process.xml
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<process name="rm-coord" xmlns="uri:falcon:process:0.1">
+ <clusters>
+ <cluster name="corp">
+ <validity end="2012-01-31T00:00Z" start="2012-01-30T01:00Z"/>
+ </cluster>
+ </clusters>
+
+ <parallel>1</parallel>
+ <order>LIFO</order>
+ <frequency>days(1)</frequency>
+ <timezone>UTC</timezone>
+ <inputs>
+ <input end="now(2,0)" start="now(0,0)" feed="inPath" name="inPath" partition="*/OK"/>
+ </inputs>
+ <outputs>
+ <output instance="now(0,0)" feed="interPath" name="interPath" />
+ <output instance="now(0,0)" feed="outputhdfsdir" name="outputhdfsdir" />
+ </outputs>
+ <properties>
+ <property name="queueName" value="default"/>
+ <property name="logTime" value="${coord:formatTime(coord:actualTime(),'yyyy-MM-dd')}"/>
+ <property name="partition" value="*"/>
+ </properties>
+ <workflow path="/user/rishu/apps/rm"/>
+ <retry policy="periodic" delay="minutes(10)" attempts="3" />
+
+ <late-process policy="exp-backoff" delay="hours(1)">
+ <late-input input="impression" workflow-path="hdfs://impression/late/workflow" />
+ <late-input input="clicks" workflow-path="hdfs://clicks/late/workflow" />
+ </late-process>
+</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/cluster-0.1.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/cluster-0.1.xml b/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/cluster-0.1.xml
new file mode 100644
index 0000000..6a00752
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/cluster-0.1.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<cluster colo="gs" description="" name="corp" xmlns="uri:falcon:cluster:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <interfaces>
+ <interface type="readonly" endpoint="hftp://gs1001.grid.corp.inmobi.com:50070"
+ version="0.20.2" />
+ <interface type="write" endpoint="hdfs://gs1001.grid.corp.inmobi.com:54310"
+ version="0.20.2" />
+ <interface type="execute" endpoint="hdfs://gs1001.grid.corp.inmobi.com:54311" version="0.20.2" />
+ <interface type="workflow" endpoint="http://gs1001.grid.corp.inmobi.com:11000/oozie/"
+ version="3.1" />
+ <interface type="messaging" endpoint="tcp://gs1001.grid.corp.inmobi.com:61616?daemon=true"
+ version="5.1.6" />
+ </interfaces>
+ <locations>
+ <location name="staging" path="/projects/ivory/staging" />
+ <location name="temp" path="/tmp" />
+ <location name="working" path="/projects/ivory/working" />
+ </locations>
+ <properties>
+ <property name="field1" value="value1" />
+ <property name="field2" value="value2" />
+ </properties>
+</cluster>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/feed-template1.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/feed-template1.xml b/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/feed-template1.xml
new file mode 100644
index 0000000..e317d41
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/feed-template1.xml
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<feed description="clicks log" name="raaw-logs16" xmlns="uri:falcon:feed:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+<!-- <partitions>
+ <partition name="fraud" />
+ <partition name="good" />
+ </partitions>
+
+ <groups>online,bi</groups>-->
+
+ <frequency>minutes(1)</frequency>
+ <late-arrival cut-off="hours(6)" />
+ <timezone>UTC</timezone>
+ <clusters>
+ <cluster name="corp" type="source">
+ <validity start="2009-01-01T00:00Z" end="2099-12-31T23:59Z"/>
+ <retention limit="months(6)" action="delete" /> <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ </clusters>
+
+ <locations>
+ <location type="data" path="/lateDataTest/testFolders/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}" />
+ <location type="stats" path="/projects/ivory/clicksStats" />
+ <location type="meta" path="/projects/ivory/clicksMetaData" />
+ </locations>
+
+ <ACL owner="testuser" group="group" permission="0x755" />
+ <schema location="/schema/clicks" provider="protobuf" />
+
+ <properties>
+ <property name="field3" value="value1" />
+ <property name="field4" value="value2" />
+ </properties>
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/feed-template2.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/feed-template2.xml b/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/feed-template2.xml
new file mode 100644
index 0000000..f2df5e6
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/feed-template2.xml
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<feed description="clicks log" name="agregated-logs16" xmlns="uri:falcon:feed:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+<!-- <partitions>
+ <partition name="fraud" />
+ <partition name="good" />
+ </partitions>
+
+ <groups>online,bi</groups>-->
+
+ <frequency>minutes(1)</frequency>
+ <timezone>UTC</timezone>
+ <late-arrival cut-off="hours(6)" />
+
+ <clusters>
+ <cluster name="corp" type="source">
+ <validity start="2009-01-01T01:00Z" end="2099-12-31T23:59Z"/>
+ <retention limit="months(6)" action="delete" /> <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ </clusters>
+
+ <locations>
+ <location type="data" path="/lateDataTest/testFolders/output-data/megaPhail" />
+ <location type="stats" path="/projects/ivory/clicksStats" />
+ <location type="meta" path="/projects/ivory/clicksMetaData" />
+ </locations>
+
+ <ACL owner="testuser" group="group" permission="0x755" />
+ <schema location="/schema/clicks" provider="protobuf" />
+
+ <properties>
+ <property name="field5" value="value1" />
+ <property name="field6" value="value2" />
+ </properties>
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/process-agg.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/process-agg.xml b/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/process-agg.xml
new file mode 100644
index 0000000..03cb727
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/RetryTests/valid1/bundle1/process-agg.xml
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<process name="agregator-coord16" xmlns="uri:falcon:process:0.1">
+
+ <clusters>
+ <cluster name="corp">
+ <validity end="2010-01-01T01:03Z" start="2010-01-01T01:01Z" />
+ </cluster>
+ </clusters>
+ <parallel>2</parallel>
+ <order>FIFO</order>
+ <frequency>minutes(1)</frequency>
+ <timezone>UTC</timezone>
+
+ <inputs>
+ <input end="now(0,0)" start="now(0,-1)"
+ feed="raaw-logs16" name="inputData"/>
+ </inputs>
+ <outputs>
+ <output instance="now(0,0)" feed="agregated-logs16"
+ name="outputData" />
+ </outputs>
+ <properties>
+ <property name="queueName" value="default"/>
+ </properties>
+ <workflow path="/examples/apps/aggregator"/>
+ <retry policy="backoff" delay="minutes(10)" attempts="3" />
+
+ <late-process policy="exp-backoff" delay="hours(1)">
+ <late-input input="inputData" workflow-path="hdfs://impression/late/workflow" />
+ </late-process>
+</process>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/cluster-0.1.xsd
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/cluster-0.1.xsd b/falcon-regression/merlin/src/test/resources/cluster-0.1.xsd
new file mode 100644
index 0000000..a7b2750
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/cluster-0.1.xsd
@@ -0,0 +1,189 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" attributeFormDefault="unqualified" elementFormDefault="qualified"
+ targetNamespace="uri:falcon:cluster:0.1" xmlns="uri:falcon:cluster:0.1"
+ xmlns:jaxb="http://java.sun.com/xml/ns/jaxb" jaxb:version="2.1">
+ <xs:annotation>
+ <xs:documentation>
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version
+ 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ </xs:documentation>
+ <xs:appinfo>
+ <jaxb:schemaBindings>
+ <jaxb:package name="org.apache.falcon.entity.v0.cluster"/>
+ </jaxb:schemaBindings>
+ </xs:appinfo>
+ </xs:annotation>
+ <xs:element name="cluster" type="cluster">
+ </xs:element>
+ <xs:complexType name="cluster">
+ <xs:annotation>
+ <xs:documentation>The cluster contains the definition of different
+ interfaces which are used by Falcon like readonly, write, workflow and messaging.
+ A cluster is referenced by feeds and processes which are on-boarded
+ to Falcon by its name.
+ name: the name of cluster, which must be unique.
+ colo: the name of the colo to which this cluster belongs to.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:sequence>
+ <xs:element type="KEY_VALUE_PAIR" name="tags" minOccurs="0">
+ <xs:annotation>
+ <xs:documentation>
+ tags: a process specifies an optional list of comma separated tags,
+ Key Value Pairs, separated by comma,
+ which is used for classification of processes.
+ Example: consumer=consumer@xyz.com, owner=producer@xyz.com, department=forecasting
+ </xs:documentation>
+ </xs:annotation>
+ </xs:element>
+ <xs:element type="interfaces" name="interfaces"/>
+ <xs:element type="locations" name="locations"/>
+ <xs:element type="properties" name="properties" minOccurs="0"/>
+ </xs:sequence>
+ <xs:attribute type="IDENTIFIER" name="name" use="required"/>
+ <xs:attribute type="xs:string" name="description"/>
+ <xs:attribute type="xs:string" name="colo" use="required"/>
+ </xs:complexType>
+ <xs:complexType name="locations">
+ <xs:annotation>
+ <xs:documentation>A list of locations on cluster.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:sequence>
+ <xs:element type="location" name="location" maxOccurs="unbounded" minOccurs="1"/>
+ </xs:sequence>
+ </xs:complexType>
+ <xs:complexType name="property">
+ <xs:annotation>
+ <xs:documentation>
+ A key-value pair, which are propagated to the
+ workflow engine.
+ Ideally the Mapred's job queue name (queueName) and
+ JMS impl class nameof messaging engine (brokerImplClass)
+ should be defined here.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:attribute type="xs:string" name="name" use="required"/>
+ <xs:attribute type="xs:string" name="value" use="required"/>
+ </xs:complexType>
+ <xs:complexType name="interface">
+ <xs:annotation>
+ <xs:documentation>
+ An interface specifies the interface type, Falcon uses it to schedule
+ entities in workflow engine, to save and read data from hadoop and to
+ publish messages to messaging engine.
+ endpoint: is the url for each interface; examples: for write it is the
+ url of hdfs (fs.default.name) and
+ for workflow it is url of workflow engine like oozie.
+ version: The current runtime version of each interface.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:attribute type="interfacetype" name="type" use="required"/>
+ <xs:attribute type="xs:string" name="endpoint" use="required"/>
+ <xs:attribute type="xs:string" name="version" use="required"/>
+ </xs:complexType>
+ <xs:complexType name="properties">
+ <xs:annotation>
+ <xs:documentation>
+ A list of property elements.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:sequence>
+ <xs:element type="property" name="property" maxOccurs="unbounded" minOccurs="0"/>
+ </xs:sequence>
+ </xs:complexType>
+ <xs:complexType name="location">
+ <xs:annotation>
+ <xs:documentation>
+ Location has the name and the path.
+ name: is the type of locations like
+ staging, temp and working.
+ path: the hdfs path for each location.
+ Falcon would use the location to do intermediate
+ processing of entities in hdfs and hence Falcon
+ should have read/write/execute permission on these
+ locations.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:attribute type="IDENTIFIER" name="name" use="required"/>
+ <xs:attribute type="xs:string" name="path" use="required"/>
+ </xs:complexType>
+ <xs:complexType name="interfaces">
+ <xs:annotation>
+ <xs:documentation>
+ A list of interfaces.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:sequence>
+ <xs:element type="interface" name="interface" maxOccurs="unbounded" minOccurs="3"/>
+ </xs:sequence>
+ </xs:complexType>
+ <xs:simpleType name="interfacetype">
+ <xs:annotation>
+ <xs:documentation>
+ An interface has 6 different interface types: readonly, write,
+ execute, workflow, messaging, registry.
+ readonly specifies the hadoop's hftp address, it's endpoint is the value of
+ dfs.http.address.ex: hftp://corp.namenode:50070/
+ write specifies the interface to write to hdfs, it's endpoint is the value
+ of fs.default.name.ex: hdfs://corp.namenode:8020
+ execute specifies the interface for job tracker, it's endpoint is the value
+ of mapred.job.tracker. ex:corp.jt:8021
+ workflow specifies the interface for workflow engine, example of it's
+ endpoint is value for OOZIE_URL.ex: http://corp.oozie:11000/oozie
+ messaging specifies the interface for sending feed availability messages, it's
+ endpoint is broker url with tcp address.ex: tcp://corp.messaging:61616?daemon=true
+ registry specifies the interface for Hcatalog.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="readonly"/>
+ <xs:enumeration value="write"/>
+ <xs:enumeration value="execute"/>
+ <xs:enumeration value="workflow"/>
+ <xs:enumeration value="messaging"/>
+ <xs:enumeration value="registry"/>
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:simpleType name="IDENTIFIER">
+ <xs:restriction base="xs:string">
+ <xs:pattern value="(([a-zA-Z]([\-a-zA-Z0-9])*){1,39})"/>
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:simpleType name="KEY_VALUE_PAIR">
+ <xs:restriction base="xs:string">
+ <xs:pattern value="(\w+=[^,]+)?([,]?[ ]*[\w]+=[^,]+)*"/>
+ </xs:restriction>
+ </xs:simpleType>
+</xs:schema>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/feed-0.1.xsd
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/feed-0.1.xsd b/falcon-regression/merlin/src/test/resources/feed-0.1.xsd
new file mode 100644
index 0000000..00b5172
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/feed-0.1.xsd
@@ -0,0 +1,375 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" attributeFormDefault="unqualified" elementFormDefault="qualified"
+ targetNamespace="uri:falcon:feed:0.1" xmlns="uri:falcon:feed:0.1"
+ xmlns:jaxb="http://java.sun.com/xml/ns/jaxb" jaxb:version="2.1">
+
+ <xs:annotation>
+ <xs:documentation>
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for
+ additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version
+ 2.0
+ (the "License"); you may not use this file
+ except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in
+ writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied.
+ See the License
+ for the specific language governing permissions and
+ limitations under the License.
+ </xs:documentation>
+ <xs:appinfo>
+ <jaxb:schemaBindings>
+ <jaxb:package name="org.apache.falcon.entity.v0.feed"/>
+ </jaxb:schemaBindings>
+ </xs:appinfo>
+ </xs:annotation>
+
+ <xs:element name="feed" type="feed">
+ </xs:element>
+ <xs:complexType name="feed">
+ <xs:annotation>
+ <xs:documentation>
+ name: A feed should have a unique name and this name is referenced
+ by processes as input or output feed.
+ tags: a feed specifies an optional list of comma separated tags
+ which is used for classification of data sets.
+ groups: a feed specifies a list of comma separated groups,
+ a group is a logical grouping of feeds and a group is said to be
+ available if all the feeds belonging to a group are available.
+ The frequency of all
+ the feed which belong to the same group
+ must be same.
+ availabilityFlag: specifies the name of a file which when
+ present/created
+ in a feeds data directory, the feed is
+ termed as available. ex: _SUCCESS, if
+ this element is ignored then Falcon would consider the presence of feed's
+ data directory as feed availability.
+ A feed has a
+ frequency and a periodicity which specifies the frequency by which
+ this feed is generated. ex: it can be generated every hour, every 5 minutes, daily, weekly etc.
+ valid frequency type for a feed are minutes, hours, days, months.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:sequence>
+ <xs:element type="KEY_VALUE_PAIR" name="tags" minOccurs="0">
+ <xs:annotation>
+ <xs:documentation>
+ tags: a feed specifies an optional list of comma separated tags,
+ Key Value Pairs, separated by comma,
+ which is used for classification of processes.
+ Example: consumer=consumer@xyz.com, owner=producer@xyz.com, department=forecasting
+ </xs:documentation>
+ </xs:annotation>
+ </xs:element>
+ <xs:element type="partitions" name="partitions" minOccurs="0"/>
+ <xs:element type="group-type" name="groups" minOccurs="0"/>
+ <xs:element type="xs:string" name="availabilityFlag" minOccurs="0"/>
+ <xs:element type="frequency-type" name="frequency"/>
+ <xs:element name="timezone" minOccurs="0" default="UTC">
+ <xs:simpleType>
+ <xs:annotation>
+ <xs:appinfo>
+ <jaxb:javaType name="java.util.TimeZone" parseMethod="java.util.TimeZone.getTimeZone"
+ printMethod="org.apache.falcon.entity.v0.SchemaHelper.getTimeZoneId"/>
+ </xs:appinfo>
+ </xs:annotation>
+ <xs:restriction base="xs:string"/>
+ </xs:simpleType>
+ </xs:element>
+ <xs:element type="late-arrival" name="late-arrival" minOccurs="0"/>
+ <xs:element type="clusters" name="clusters"/>
+ <xs:choice minOccurs="1" maxOccurs="1">
+ <xs:element type="locations" name="locations"/>
+ <xs:element type="catalog-table" name="table"/>
+ </xs:choice>
+ <xs:element type="ACL" name="ACL"/>
+ <xs:element type="schema" name="schema"/>
+ <xs:element type="properties" name="properties" minOccurs="0"/>
+ </xs:sequence>
+ <xs:attribute type="IDENTIFIER" name="name" use="required"/>
+ <xs:attribute type="xs:string" name="description"/>
+ </xs:complexType>
+
+ <xs:complexType name="cluster">
+ <xs:annotation>
+ <xs:documentation>
+ Feed references a cluster by it's name, before submitting a feed all the
+ referenced cluster should be submitted to Falcon.
+ type: specifies whether the
+ referenced cluster should be treated as a
+ source or target for a feed.
+ Validity of a feed on cluster specifies duration for which this feed is
+ valid on this cluster.
+ Retention specifies how long the feed is retained on this cluster and the
+ action to be taken on the feed after the expiry of retention period.
+ The retention limit is
+ specified by expression frequency(times), ex: if
+ feed should be retained for at least 6 hours then retention's limit="hours(6)".
+ The field partitionExp contains
+ partition tags. Number of partition tags has to be equal to number of partitions specified in feed
+ schema.
+ A partition tag can be a wildcard(*), a static string or
+ an expression. Atleast one of the strings has to be an expression.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:sequence>
+ <xs:element type="validity" name="validity"/>
+ <xs:element type="retention" name="retention"/>
+ <xs:choice minOccurs="0" maxOccurs="1">
+ <xs:element type="locations" name="locations" minOccurs="0"/>
+ <xs:element type="catalog-table" name="table"/>
+ </xs:choice>
+ </xs:sequence>
+ <xs:attribute type="IDENTIFIER" name="name" use="required"/>
+ <xs:attribute type="cluster-type" name="type" use="optional"/>
+ <xs:attribute type="xs:string" name="partition" use="optional"/>
+ <xs:attribute type="frequency-type" name="delay" use="optional" />
+ </xs:complexType>
+ <xs:complexType name="partitions">
+ <xs:annotation>
+ <xs:documentation>
+ A list of partition, which is the logical partition of a feed and this
+ is maintained in Hcatalog registry.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:sequence>
+ <xs:element type="partition" name="partition" maxOccurs="unbounded" minOccurs="0"/>
+ </xs:sequence>
+ </xs:complexType>
+ <xs:complexType name="schema">
+ <xs:annotation>
+ <xs:documentation>A schema specifies the location of a schema file
+ for a feed and the provider of schema like protobuf, thrift etc.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:attribute type="xs:string" name="location" use="required"/>
+ <xs:attribute type="xs:string" name="provider" use="required"/>
+ </xs:complexType>
+ <xs:complexType name="properties">
+ <xs:annotation>
+ <xs:documentation>
+ A list of name-value pair of property.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:sequence>
+ <xs:element type="property" name="property" maxOccurs="unbounded" minOccurs="0"/>
+ </xs:sequence>
+ </xs:complexType>
+ <xs:complexType name="validity">
+ <xs:annotation>
+ <xs:documentation>
+ A validity has a start, which is the validity start date and end the
+ validity
+ end date. ex: start="2011-11-01T00:00Z" in TZ format.
+ timezone can be UTC,
+ GMT.
+ Processes referring this feed would consider the validity period for
+ validation.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:attribute type="date-time-type" name="start" use="required"/>
+ <xs:attribute type="date-time-type" name="end" use="required"/>
+ </xs:complexType>
+ <xs:complexType name="locations">
+ <xs:annotation>
+ <xs:documentation>
+ A list of locations on the file system.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:choice maxOccurs="unbounded" minOccurs="0">
+ <xs:element type="location" name="location"/>
+ </xs:choice>
+ </xs:complexType>
+ <xs:complexType name="late-arrival">
+ <xs:annotation>
+ <xs:documentation>
+ late-arrival specifies the cut-off period till which the feed is
+ expected to arrive late and should be honored be processes referring
+ to it as input
+ feed by rerunning the instances in case
+ the data arrives late with in a cut-off period.
+ The cut-off period is specified by expression
+ frequency(times), ex: if the feed
+ can arrive late
+ upto 8 hours then late-arrival's cut-off="hours(8)"
+ </xs:documentation>
+ </xs:annotation>
+ <xs:attribute type="frequency-type" name="cut-off" use="required"/>
+ </xs:complexType>
+ <xs:complexType name="property">
+ <xs:annotation>
+ <xs:documentation>
+ A key-value pair, which are propagated to the
+ workflow engine.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:attribute type="xs:string" name="name" use="required"/>
+ <xs:attribute type="xs:string" name="value" use="required"/>
+ </xs:complexType>
+ <xs:complexType name="clusters">
+ <xs:annotation>
+ <xs:documentation>
+ A list of clusters.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:sequence>
+ <xs:element type="cluster" name="cluster" maxOccurs="unbounded" minOccurs="1">
+ </xs:element>
+ </xs:sequence>
+ </xs:complexType>
+ <xs:complexType name="retention">
+ <xs:attribute type="retention-type" name="type" default="instance"/>
+ <xs:attribute type="frequency-type" name="limit" use="required"/>
+ <xs:attribute type="action-type" name="action" use="required"/>
+ </xs:complexType>
+ <xs:simpleType name="retention-type">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="instance"/>
+ <!-- <xs:enumeration value="age" /> -->
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:complexType name="location">
+ <xs:annotation>
+ <xs:documentation>
+ location specifies the type of location like data, meta, stats
+ and the corresponding paths for them.
+ A feed should at least define the location for type
+ data, which
+ specifies the HDFS path pattern where the feed is generated
+ periodically. ex: type="data" path="/projects/TrafficHourly/${YEAR}-${MONTH}-${DAY}/traffic"
+ </xs:documentation>
+ </xs:annotation>
+ <xs:attribute type="location-type" name="type" use="required"/>
+ <xs:attribute type="xs:string" name="path" use="required"/>
+ </xs:complexType>
+ <xs:complexType name="partition">
+ <xs:attribute type="IDENTIFIER" name="name" use="required"/>
+ </xs:complexType>
+ <xs:complexType name="ACL">
+ <xs:annotation>
+ <xs:documentation>
+ Access control list for this feed.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:attribute type="xs:string" name="owner"/>
+ <xs:attribute type="xs:string" name="group"/>
+ <xs:attribute type="xs:string" name="permission"/>
+ </xs:complexType>
+ <xs:simpleType name="action-type">
+ <xs:restriction base="xs:string">
+ <xs:annotation>
+ <xs:documentation>
+ action type specifies the action that should be taken on a feed
+ when the retention period of a feed expires on a cluster,
+ the valid
+ actions are
+ archive, delete, chown and chmod.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:enumeration value="archive"/>
+ <xs:enumeration value="delete"/>
+ <xs:enumeration value="chown"/>
+ <xs:enumeration value="chmod"/>
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:simpleType name="cluster-type">
+ <xs:annotation>
+ <xs:documentation>
+ The clusters on feed can be either defined as source or target,
+ a feed
+ should at least have one source cluster defined.
+ the target clusters
+ are used for
+ replication of feed.
+ </xs:documentation>
+ </xs:annotation>
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="source"/>
+ <xs:enumeration value="target"/>
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:simpleType name="location-type">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="data"/>
+ <xs:enumeration value="stats"/>
+ <xs:enumeration value="meta"/>
+ <xs:enumeration value="tmp"/>
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:simpleType name="IDENTIFIER">
+ <xs:restriction base="xs:string">
+ <xs:pattern value="(([a-zA-Z]([\-a-zA-Z0-9])*){1,39})"/>
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:simpleType name="frequency-type">
+ <xs:annotation>
+ <xs:appinfo>
+ <jaxb:javaType name="org.apache.falcon.entity.v0.Frequency"
+ parseMethod="org.apache.falcon.entity.v0.Frequency.fromString"
+ printMethod="org.apache.falcon.entity.v0.Frequency.toString"/>
+ </xs:appinfo>
+ </xs:annotation>
+ <xs:restriction base="xs:string">
+ <xs:pattern value="(minutes|hours|days|months)\([1-9]\d*\)"/>
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:simpleType name="date-time-type">
+ <xs:annotation>
+ <xs:appinfo>
+ <jaxb:javaType name="java.util.Date" parseMethod="org.apache.falcon.entity.v0.SchemaHelper.parseDateUTC"
+ printMethod="org.apache.falcon.entity.v0.SchemaHelper.formatDateUTC"/>
+ </xs:appinfo>
+ </xs:annotation>
+ <xs:restriction base="xs:string">
+ <xs:pattern
+ value="((19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])T([0-1][0-9]|2[0-3]):([0-5][0-9]))Z"/>
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:simpleType name="group-type">
+ <xs:restriction base="xs:string">
+ <xs:pattern value="(\w+(,\w+)*)"/>
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:simpleType name="KEY_VALUE_PAIR">
+ <xs:restriction base="xs:string">
+ <xs:pattern value="(\w+=[^,]+)?([,]?[ ]*[\w]+=[^,]+)*"/>
+ </xs:restriction>
+ </xs:simpleType>
+ <xs:complexType name="catalog-table">
+ <xs:annotation>
+ <xs:documentation>
+ catalog specifies the uri of a Hive table along with the partition spec.
+ uri="catalog:$database:$table#(partition-key=partition-value);+"
+ Example: catalog:logs-db:clicks#ds=${YEAR}-${MONTH}-${DAY}
+ </xs:documentation>
+ </xs:annotation>
+ <xs:attribute type="xs:string" name="uri" use="required"/>
+ </xs:complexType>
+</xs:schema>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/feed-s4Replication.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/feed-s4Replication.xml b/falcon-regression/merlin/src/test/resources/feed-s4Replication.xml
new file mode 100644
index 0000000..4e7d4c3
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/feed-s4Replication.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!--~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<feed name="iat-download-data-1" description="iat-download-data-1" xmlns="uri:ivory:feed:0.1">
+ <availabilityFlag>_SUCCESS</availabilityFlag>
+ <frequency>hours(1)</frequency>
+ <timezone>UTC</timezone>
+ <clusters>
+ <cluster name="ua2-ruby" type="source">
+ <validity start="2012-12-26T05:00Z" end="2034-12-20T08:00Z"/>
+ <retention limit="months(36)" action="delete"/>
+ <locations>
+ <location type="data" path="s4://inmobi-iat-data/userplatform/${YEAR}/${MONTH}/${DAY}/${HOUR}"/>
+ </locations>
+ </cluster>
+ <cluster name="prod-global" type="target">
+ <validity start="2012-12-24T10:00Z" end="2034-12-20T08:00Z"/>
+ <retention limit="days(4)" action="delete"/>
+ </cluster>
+ </clusters>
+ <locations>
+ <location type="data" path="/projects/userplatform/iatdownload/${YEAR}/${MONTH}/${DAY}/${HOUR}"/>
+ </locations>
+ <ACL owner="rmcuser" group="group" permission="0x755"/>
+ <schema location="/schema/clicks" provider="protobuf"/>
+</feed>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/hcat/cluster-0.1.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/hcat/cluster-0.1.xml b/falcon-regression/merlin/src/test/resources/hcat/cluster-0.1.xml
new file mode 100644
index 0000000..5396835
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/hcat/cluster-0.1.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<cluster colo="ua1" description="" name="corp" xmlns="uri:falcon:cluster:0.1"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <interfaces>
+ <interface type="readonly" endpoint="http://gs1001.grid.corp.inmobi.com:50070"
+ version="0.20.2" />
+ <interface type="write" endpoint="hdfs://gs1001.grid.corp.inmobi.com:54310"
+ version="0.20.2" />
+ <interface type="execute" endpoint="hdfs://gs1001.grid.corp.inmobi.com:54311"
+ version="0.20.2" />
+ <interface type="workflow"
+ endpoint="http://gs1001.grid.corp.inmobi.com:11000/oozie/" version="3.1" />
+ <interface type="messaging"
+ endpoint="tcp://gs1001.grid.corp.inmobi.com:61616?daemon=true"
+ version="5.1.6" />
+ <interface type="registry" endpoint="thrift://10.14.118.32:14003"
+ version="0.11.0" />
+
+ </interfaces>
+ <locations>
+ <location name="staging" path="/projects/ivory/staging" />
+ <location name="temp" path="/tmp" />
+ <location name="working" path="/projectsTest/ivory/working" />
+ </locations>
+ <properties>
+ <property name="hive.metastore.client.socket.timeout" value="120"/>
+ <property name="field1" value="value1" />
+ <property name="field2" value="value2" />
+ </properties>
+</cluster>
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/hcat/data/_SUCCESS
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/hcat/data/_SUCCESS b/falcon-regression/merlin/src/test/resources/hcat/data/_SUCCESS
new file mode 100644
index 0000000..e69de29
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/hcat/data/data.txt
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/hcat/data/data.txt b/falcon-regression/merlin/src/test/resources/hcat/data/data.txt
new file mode 100644
index 0000000..4321d67
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/hcat/data/data.txt
@@ -0,0 +1,3 @@
+key1val1
+key2val2
+key3val3
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/hcat/feed-template1.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/hcat/feed-template1.xml b/falcon-regression/merlin/src/test/resources/hcat/feed-template1.xml
new file mode 100644
index 0000000..2710ea7
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/hcat/feed-template1.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<feed description="clicks log" name="raaw-logs16" xmlns="uri:falcon:feed:0.1"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+
+ <frequency>minutes(20)</frequency>
+ <timezone>UTC</timezone>
+ <late-arrival cut-off="hours(6)" />
+
+ <clusters>
+ <cluster name="corp" type="source">
+ <validity start="2009-02-01T00:00Z" end="2099-05-01T00:00Z" />
+ <retention limit="months(9000)" action="delete" /> <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ </clusters>
+
+ <table
+ uri="catalog:default:mytablepart3#year=${YEAR};month=${MONTH};day=${DAY};hour=${HOUR}" />
+
+ <ACL owner="testuser" group="group" permission="0x755" />
+ <schema location="hcat" provider="hcat" />
+
+ <properties>
+ <property name="field1" value="value1" />
+ <property name="field2" value="value2" />
+ </properties>
+</feed>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/hcat/feed-template2.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/hcat/feed-template2.xml b/falcon-regression/merlin/src/test/resources/hcat/feed-template2.xml
new file mode 100644
index 0000000..e189bc4
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/hcat/feed-template2.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<feed description="clicks log" name="agregated-logs16" xmlns="uri:falcon:feed:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+<!-- <partitions>
+ <partition name="fraud" />
+ <partition name="good" />
+ </partitions>
+
+ <groups>online,bi</groups>-->
+
+ <frequency>hours(1)</frequency>
+<timezone>UTC</timezone>
+ <late-arrival cut-off="hours(6)" />
+
+ <clusters>
+ <cluster name="corp" type="source">
+ <validity start="2009-02-01T01:00Z" end="2099-05-01T00:00Z"
+ />
+ <retention limit="hours(6)" action="delete" /> <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ </cluster>
+ </clusters>
+
+ <table uri="catalog:default:output_table#year=${YEAR};month=${MONTH};day=${DAY};hour=${HOUR}" />
+
+ <ACL owner="testuser" group="group" permission="0x755" />
+ <schema location="hcat" provider="hcat" />
+
+ <properties>
+ <property name="field1" value="value1" />
+ <property name="field2" value="value2" />
+ </properties>
+</feed>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/hcat/hcat-process.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/hcat/hcat-process.xml b/falcon-regression/merlin/src/test/resources/hcat/hcat-process.xml
new file mode 100644
index 0000000..2701b26
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/hcat/hcat-process.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<process name="agregator-coord16" xmlns="uri:falcon:process:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+ <clusters>
+ <cluster name="corp">
+ <validity end="2011-01-03T03:00Z" start="2010-01-02T01:00Z" />
+ </cluster>
+ </clusters>
+ <parallel>1</parallel>
+ <order>FIFO</order>
+ <frequency>minutes(5)</frequency>
+ <timezone>UTC</timezone>
+
+ <inputs>
+ <input end="now(0,0)" start="now(0,-20)"
+ feed="raaw-logs16" name="inputData"/>
+ </inputs>
+ <outputs>
+ <output instance="now(0,0)" feed="agregated-logs16"
+ name="outputData" />
+ </outputs>
+ <properties>
+ <property name="queueName" value="default"/>
+ </properties>
+ <workflow path="/examples/apps/aggregator"/>
+ <retry policy="periodic" delay="minutes(3)" attempts="3" />
+
+</process>
+
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/hcat/hivescript/script.hql
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/hcat/hivescript/script.hql b/falcon-regression/merlin/src/test/resources/hcat/hivescript/script.hql
new file mode 100644
index 0000000..014270d
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/hcat/hivescript/script.hql
@@ -0,0 +1,19 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+INSERT OVERWRITE TABLE ${falcon_outputData_database}.${falcon_outputData_table} PARTITION ${falcon_inputData_filter} SELECT id, value FROM ${falcon_inputData_database}.${falcon_inputData_table} WHERE ${falcon_inputData_filter};
http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/bdcf001f/falcon-regression/merlin/src/test/resources/hcat/hivescript/script_non_hcat_input.hql
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/hcat/hivescript/script_non_hcat_input.hql b/falcon-regression/merlin/src/test/resources/hcat/hivescript/script_non_hcat_input.hql
new file mode 100644
index 0000000..935d6dc
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/hcat/hivescript/script_non_hcat_input.hql
@@ -0,0 +1,27 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+DROP TABLE IF EXISTS ${falcon_outputData_database}.temp_table_on_raw_data;
+
+CREATE EXTERNAL TABLE ${falcon_outputData_database}.temp_table_on_raw_data(id STRING, value STRING)
+ LOCATION '${inputData}';
+
+INSERT OVERWRITE TABLE ${falcon_outputData_database}.${falcon_outputData_table}
+ PARTITION (dt='${falcon_outputData_dated_partition_value}')
+ SELECT id, value FROM temp_table_on_raw_data;
+
+DROP TABLE IF EXISTS ${falcon_outputData_database}.temp_table_on_raw_data;
\ No newline at end of file