You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@falcon.apache.org by ba...@apache.org on 2016/07/08 20:31:07 UTC

falcon git commit: FALCON-2030 Enfornce time partition in the storage path for feeds with import and export policy

Repository: falcon
Updated Branches:
  refs/heads/master 91f7c817c -> c1d37bfab


FALCON-2030 Enfornce time partition in the storage path for feeds with import and export policy

Enforce time partition in the storage path for feeds with import and export policy.
HCAT storage types, this restriction is already handled.

Author: Venkatesan Ramachandran <vr...@hortonworks.com>

Reviewers: "Ajay Yadava <aj...@apache.org>,Ying Zheng <yz...@hortonworks.com>"

Closes #214 from vramachan/FALCON-2030.EnforceTimePartition


Project: http://git-wip-us.apache.org/repos/asf/falcon/repo
Commit: http://git-wip-us.apache.org/repos/asf/falcon/commit/c1d37bfa
Tree: http://git-wip-us.apache.org/repos/asf/falcon/tree/c1d37bfa
Diff: http://git-wip-us.apache.org/repos/asf/falcon/diff/c1d37bfa

Branch: refs/heads/master
Commit: c1d37bfab3cae98a61cbe924786a0335e13135dc
Parents: 91f7c81
Author: Venkatesan Ramachandran <vr...@hortonworks.com>
Authored: Fri Jul 8 13:30:22 2016 -0700
Committer: bvellanki <bv...@hortonworks.com>
Committed: Fri Jul 8 13:30:22 2016 -0700

----------------------------------------------------------------------
 .../falcon/entity/parser/FeedEntityParser.java  | 14 ++++
 .../entity/parser/FeedEntityParserTest.java     | 18 +++++
 .../resources/config/feed/feed-import-0.1.xml   |  2 +-
 .../feed/feed-import-exclude-fields-0.1.xml     |  4 +-
 .../config/feed/feed-import-invalid-0.1.xml     |  4 +-
 .../feed-import-invalid-storage-path-0.1.xml    | 73 ++++++++++++++++++++
 .../feed/feed-import-no-timepartition-0.1.xml   | 73 ++++++++++++++++++++
 .../config/feed/feed-import-noargs-0.1.xml      |  4 +-
 docs/src/site/twiki/EntitySpecification.twiki   |  3 +
 docs/src/site/twiki/ImportExport.twiki          |  3 +
 10 files changed, 191 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/falcon/blob/c1d37bfa/common/src/main/java/org/apache/falcon/entity/parser/FeedEntityParser.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/falcon/entity/parser/FeedEntityParser.java b/common/src/main/java/org/apache/falcon/entity/parser/FeedEntityParser.java
index 28fdaf8..6b72174 100644
--- a/common/src/main/java/org/apache/falcon/entity/parser/FeedEntityParser.java
+++ b/common/src/main/java/org/apache/falcon/entity/parser/FeedEntityParser.java
@@ -27,6 +27,7 @@ import org.apache.falcon.entity.EntityUtil;
 import org.apache.falcon.entity.FeedHelper;
 import org.apache.falcon.entity.FileSystemStorage;
 import org.apache.falcon.entity.Storage;
+import org.apache.falcon.entity.common.FeedDataPath;
 import org.apache.falcon.entity.store.ConfigurationStore;
 import org.apache.falcon.entity.v0.Entity;
 import org.apache.falcon.entity.v0.EntityGraph;
@@ -65,6 +66,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.TimeZone;
+import java.util.regex.Matcher;
 
 /**
  * Parser that parses feed entity definition.
@@ -637,9 +639,21 @@ public class FeedEntityParser extends EntityParser<Feed> {
                     + "but it doesn't contain location type - data in cluster " + cluster.getName());
             }
 
+            // storage location needs to have time partition if import or export is enabled.
+            if (FeedHelper.isImportEnabled(cluster) || FeedHelper.isExportEnabled(cluster)) {
+                if (!matchStoragePathPattern(dataLocation.getPath())) {
+                    throw new ValidationException(String.format("Feed %s with Import/Export policy "
+                            + "needs to have time partition in the storage location path", feed.getName()));
+                }
+            }
         }
     }
 
+    private boolean matchStoragePathPattern(String feedBasePath) {
+        Matcher matcher = FeedDataPath.PATTERN.matcher(feedBasePath);
+        return matcher.find();
+    }
+
     /**
      * Validate extraction and merge type combination. Currently supported combo:
      *

http://git-wip-us.apache.org/repos/asf/falcon/blob/c1d37bfa/common/src/test/java/org/apache/falcon/entity/parser/FeedEntityParserTest.java
----------------------------------------------------------------------
diff --git a/common/src/test/java/org/apache/falcon/entity/parser/FeedEntityParserTest.java b/common/src/test/java/org/apache/falcon/entity/parser/FeedEntityParserTest.java
index f9aad19..ced4fc5 100644
--- a/common/src/test/java/org/apache/falcon/entity/parser/FeedEntityParserTest.java
+++ b/common/src/test/java/org/apache/falcon/entity/parser/FeedEntityParserTest.java
@@ -1161,6 +1161,24 @@ public class FeedEntityParserTest extends AbstractTestBase {
         Assert.fail("ValidationException should have been thrown");
     }
 
+    @Test (expectedExceptions = {ValidationException.class})
+    public void testImportFeedWithNoTimePartition() throws Exception {
+
+        InputStream feedStream = this.getClass()
+                .getResourceAsStream("/config/feed/feed-import-no-timepartition-0.1.xml");
+        parser.parseAndValidate(feedStream);
+        Assert.fail("ValidationException should have been thrown");
+    }
+
+    @Test (expectedExceptions = {ValidationException.class})
+    public void testImportFeedWithInvalidTimePartition() throws Exception {
+
+        InputStream feedStream = this.getClass()
+                .getResourceAsStream("/config/feed/feed-import-invalid-storage-path-0.1.xml");
+        parser.parseAndValidate(feedStream);
+        Assert.fail("ValidationException should have been thrown");
+    }
+
     public void testValidateEmailNotification() throws Exception {
         Feed feedNotification = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(
                 (FeedEntityParserTest.class.getResourceAsStream(FEED_XML)));

http://git-wip-us.apache.org/repos/asf/falcon/blob/c1d37bfa/common/src/test/resources/config/feed/feed-import-0.1.xml
----------------------------------------------------------------------
diff --git a/common/src/test/resources/config/feed/feed-import-0.1.xml b/common/src/test/resources/config/feed/feed-import-0.1.xml
index 69f7ede..20489c9 100644
--- a/common/src/test/resources/config/feed/feed-import-0.1.xml
+++ b/common/src/test/resources/config/feed/feed-import-0.1.xml
@@ -59,7 +59,7 @@
     </clusters>
 
     <locations>
-        <location type="data" path="/projects/falcon/clicks"/>
+        <location type="data" path="/projects/falcon/clicks/${YEAR}-${MONTH}"/>
         <location type="stats" path="/projects/falcon/clicksStats"/>
         <location type="meta" path="/projects/falcon/clicksMetaData"/>
     </locations>

http://git-wip-us.apache.org/repos/asf/falcon/blob/c1d37bfa/common/src/test/resources/config/feed/feed-import-exclude-fields-0.1.xml
----------------------------------------------------------------------
diff --git a/common/src/test/resources/config/feed/feed-import-exclude-fields-0.1.xml b/common/src/test/resources/config/feed/feed-import-exclude-fields-0.1.xml
index 5a6fcd9..03518d9 100644
--- a/common/src/test/resources/config/feed/feed-import-exclude-fields-0.1.xml
+++ b/common/src/test/resources/config/feed/feed-import-exclude-fields-0.1.xml
@@ -56,7 +56,7 @@
                 </arguments>
             </import>
             <locations>
-                <location type="data" path="/projects/falcon/clicks"/>
+                <location type="data" path="/projects/falcon/clicks/${MONTH}/click1"/>
                 <location type="stats" path="/projects/falcon/clicksStats"/>
                 <location type="meta" path="/projects/falcon/clicksMetaData"/>
             </locations>
@@ -64,7 +64,7 @@
     </clusters>
 
     <locations>
-        <location type="data" path="/projects/falcon/clicks"/>
+        <location type="data" path="/projects/falcon/clicks/${MONTH}/click1"/>
         <location type="stats" path="/projects/falcon/clicksStats"/>
         <location type="meta" path="/projects/falcon/clicksMetaData"/>
     </locations>

http://git-wip-us.apache.org/repos/asf/falcon/blob/c1d37bfa/common/src/test/resources/config/feed/feed-import-invalid-0.1.xml
----------------------------------------------------------------------
diff --git a/common/src/test/resources/config/feed/feed-import-invalid-0.1.xml b/common/src/test/resources/config/feed/feed-import-invalid-0.1.xml
index 9428bce..900e0f0 100644
--- a/common/src/test/resources/config/feed/feed-import-invalid-0.1.xml
+++ b/common/src/test/resources/config/feed/feed-import-invalid-0.1.xml
@@ -55,7 +55,7 @@
                 </arguments>
             </import>
             <locations>
-                <location type="data" path="/projects/falcon/clicks"/>
+                <location type="data" path="/projects/falcon/clicks/${YEAR}"/>
                 <location type="stats" path="/projects/falcon/clicksStats"/>
                 <location type="meta" path="/projects/falcon/clicksMetaData"/>
             </locations>
@@ -63,7 +63,7 @@
     </clusters>
 
     <locations>
-        <location type="data" path="/projects/falcon/clicks"/>
+        <location type="data" path="/projects/falcon/clicks/${YEAR}"/>
         <location type="stats" path="/projects/falcon/clicksStats"/>
         <location type="meta" path="/projects/falcon/clicksMetaData"/>
     </locations>

http://git-wip-us.apache.org/repos/asf/falcon/blob/c1d37bfa/common/src/test/resources/config/feed/feed-import-invalid-storage-path-0.1.xml
----------------------------------------------------------------------
diff --git a/common/src/test/resources/config/feed/feed-import-invalid-storage-path-0.1.xml b/common/src/test/resources/config/feed/feed-import-invalid-storage-path-0.1.xml
new file mode 100644
index 0000000..d589bb9
--- /dev/null
+++ b/common/src/test/resources/config/feed/feed-import-invalid-storage-path-0.1.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+<feed description="Customer data" name="CustomerFeed" xmlns="uri:falcon:feed:0.1">
+    <tags>consumer=consumer@xyz.com, owner=producer@xyz.com, _department_type=forecasting</tags>
+    <partitions>
+        <partition name="fraud"/>
+        <partition name="good"/>
+    </partitions>
+
+    <groups>online,bi</groups>
+    <availabilityFlag>_SUCCESS</availabilityFlag>
+
+    <frequency>hours(1)</frequency>
+    <sla slaLow="hours(2)" slaHigh="hours(3)"/>
+    <timezone>UTC</timezone>
+
+    <late-arrival cut-off="hours(6)"/>
+
+    <clusters>
+        <cluster name="testCluster" type="source">
+            <validity start="2011-11-01T00:00Z" end="2011-12-31T00:00Z"/>
+            <retention limit="hours(48)" action="delete"/>
+            <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+            <sla slaLow="hours(3)" slaHigh="hours(4)"/>
+            <import>
+                <source name="test-hsql-db" tableName="customer">
+                    <extract type="full">
+                        <mergepolicy>snapshot</mergepolicy>
+                    </extract>
+                    <fields>
+                        <includes>
+                            <field>id</field>
+                            <field>name</field>
+                        </includes>
+                    </fields>
+                </source>
+                <arguments>
+                    <argument name="--num-mappers" value="2"/>
+                </arguments>
+            </import>
+            <locations>
+                <location type="data" path="/projects/falcon/clicks/${INVALID}"/>
+                <location type="stats" path="/projects/falcon/clicksStats"/>
+                <location type="meta" path="/projects/falcon/clicksMetaData"/>
+            </locations>
+        </cluster>
+    </clusters>
+
+    <locations>
+        <location type="data" path="/projects/falcon/clicks/${INVALID}"/>
+        <location type="stats" path="/projects/falcon/clicksStats"/>
+        <location type="meta" path="/projects/falcon/clicksMetaData"/>
+    </locations>
+
+    <ACL owner="testuser" group="group" permission="0x755"/>
+    <schema location="/schema/clicks" provider="protobuf"/>
+</feed>

http://git-wip-us.apache.org/repos/asf/falcon/blob/c1d37bfa/common/src/test/resources/config/feed/feed-import-no-timepartition-0.1.xml
----------------------------------------------------------------------
diff --git a/common/src/test/resources/config/feed/feed-import-no-timepartition-0.1.xml b/common/src/test/resources/config/feed/feed-import-no-timepartition-0.1.xml
new file mode 100644
index 0000000..9428bce
--- /dev/null
+++ b/common/src/test/resources/config/feed/feed-import-no-timepartition-0.1.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+<feed description="Customer data" name="CustomerFeed" xmlns="uri:falcon:feed:0.1">
+    <tags>consumer=consumer@xyz.com, owner=producer@xyz.com, _department_type=forecasting</tags>
+    <partitions>
+        <partition name="fraud"/>
+        <partition name="good"/>
+    </partitions>
+
+    <groups>online,bi</groups>
+    <availabilityFlag>_SUCCESS</availabilityFlag>
+
+    <frequency>hours(1)</frequency>
+    <sla slaLow="hours(2)" slaHigh="hours(3)"/>
+    <timezone>UTC</timezone>
+
+    <late-arrival cut-off="hours(6)"/>
+
+    <clusters>
+        <cluster name="testCluster" type="source">
+            <validity start="2011-11-01T00:00Z" end="2011-12-31T00:00Z"/>
+            <retention limit="hours(48)" action="delete"/>
+            <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+            <sla slaLow="hours(3)" slaHigh="hours(4)"/>
+            <import>
+                <source name="test-hsql-db" tableName="customer">
+                    <extract type="full">
+                        <mergepolicy>snapshot</mergepolicy>
+                    </extract>
+                    <fields>
+                        <includes>
+                            <field>id</field>
+                            <field>name</field>
+                        </includes>
+                    </fields>
+                </source>
+                <arguments>
+                    <argument name="--num-mappers" value="2"/>
+                </arguments>
+            </import>
+            <locations>
+                <location type="data" path="/projects/falcon/clicks"/>
+                <location type="stats" path="/projects/falcon/clicksStats"/>
+                <location type="meta" path="/projects/falcon/clicksMetaData"/>
+            </locations>
+        </cluster>
+    </clusters>
+
+    <locations>
+        <location type="data" path="/projects/falcon/clicks"/>
+        <location type="stats" path="/projects/falcon/clicksStats"/>
+        <location type="meta" path="/projects/falcon/clicksMetaData"/>
+    </locations>
+
+    <ACL owner="testuser" group="group" permission="0x755"/>
+    <schema location="/schema/clicks" provider="protobuf"/>
+</feed>

http://git-wip-us.apache.org/repos/asf/falcon/blob/c1d37bfa/common/src/test/resources/config/feed/feed-import-noargs-0.1.xml
----------------------------------------------------------------------
diff --git a/common/src/test/resources/config/feed/feed-import-noargs-0.1.xml b/common/src/test/resources/config/feed/feed-import-noargs-0.1.xml
index c96249c..2a36283 100644
--- a/common/src/test/resources/config/feed/feed-import-noargs-0.1.xml
+++ b/common/src/test/resources/config/feed/feed-import-noargs-0.1.xml
@@ -46,7 +46,7 @@
                 </source>
             </import>
             <locations>
-                <location type="data" path="/projects/falcon/clicks"/>
+                <location type="data" path="/projects/falcon/clicks/${YEAR}"/>
                 <location type="stats" path="/projects/falcon/clicksStats"/>
                 <location type="meta" path="/projects/falcon/clicksMetaData"/>
             </locations>
@@ -54,7 +54,7 @@
     </clusters>
 
     <locations>
-        <location type="data" path="/projects/falcon/clicks"/>
+        <location type="data" path="/projects/falcon/clicks/${YEAR}"/>
         <location type="stats" path="/projects/falcon/clicksStats"/>
         <location type="meta" path="/projects/falcon/clicksMetaData"/>
     </locations>

http://git-wip-us.apache.org/repos/asf/falcon/blob/c1d37bfa/docs/src/site/twiki/EntitySpecification.twiki
----------------------------------------------------------------------
diff --git a/docs/src/site/twiki/EntitySpecification.twiki b/docs/src/site/twiki/EntitySpecification.twiki
index 9f9e210..faad305 100644
--- a/docs/src/site/twiki/EntitySpecification.twiki
+++ b/docs/src/site/twiki/EntitySpecification.twiki
@@ -334,6 +334,9 @@ The snapshot layout creates a snapshot of the data on HDFS using the feed's loca
 to specify the projection columns. Feed import from database underneath uses sqoop to achieve the task. Any advanced
 Sqoop options can be specified via the arguments.
 
+The feed's data storage location should include some combination of timepartition if import policy is associated with it.
+Please see ImportExport documentation for more details.
+
 ---+++ Late Arrival
 
 <verbatim>

http://git-wip-us.apache.org/repos/asf/falcon/blob/c1d37bfa/docs/src/site/twiki/ImportExport.twiki
----------------------------------------------------------------------
diff --git a/docs/src/site/twiki/ImportExport.twiki b/docs/src/site/twiki/ImportExport.twiki
index b0ce7ff..2fcb42b 100644
--- a/docs/src/site/twiki/ImportExport.twiki
+++ b/docs/src/site/twiki/ImportExport.twiki
@@ -108,6 +108,9 @@ where {lib-dir} value varies in oozie deployments.
       since the frequency of the Feed is hour(1) and the Feed instances are deleted after 90 days because of the
       retention policy.
 
+      The feed's data location should have some combination of time partitions (like ${YEAR}, ${MONTH}, {$DAY},
+      ${HOUR}, ${MINUTE} etc) if import or export policy is associated. 
+
 
       <verbatim>