You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@falcon.apache.org by sr...@apache.org on 2013/07/09 07:16:07 UTC

git commit: FALCON-29 Add ability to tag/classify data sets and processes to enable discovery. Contributed by Venkatesh Seetharam

Updated Branches:
  refs/heads/master f2a17306a -> d7e2be9af


FALCON-29 Add ability to tag/classify data sets and processes to enable discovery. Contributed by Venkatesh Seetharam


Project: http://git-wip-us.apache.org/repos/asf/incubator-falcon/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-falcon/commit/d7e2be9a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-falcon/tree/d7e2be9a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-falcon/diff/d7e2be9a

Branch: refs/heads/master
Commit: d7e2be9afa2a5dc96acd1ec9e325f39c6b2f17f7
Parents: f2a1730
Author: srikanth.sundarrajan <sr...@inmobi.com>
Authored: Tue Jul 9 10:43:31 2013 +0530
Committer: srikanth.sundarrajan <sr...@inmobi.com>
Committed: Tue Jul 9 10:43:31 2013 +0530

----------------------------------------------------------------------
 CHANGES.txt                                     |  3 +
 client/src/main/resources/cluster-0.1.xsd       | 15 +++++
 client/src/main/resources/feed-0.1.xsd          | 20 ++++++-
 client/src/main/resources/process-0.1.xsd       | 52 +++++++----------
 .../entity/parser/ClusterEntityParserTest.java  | 25 ++++++++
 .../entity/parser/FeedEntityParserTest.java     | 25 ++++++--
 .../resources/config/cluster/cluster-0.1.xml    |  4 +-
 .../resources/config/feed/feed-tags-0.1.xml     | 60 ++++++++++++++++++++
 8 files changed, 164 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/d7e2be9a/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 45e0a7e..9f07f31 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -8,6 +8,9 @@ Trunk (Unreleased)
 
   IMPROVEMENTS
 
+    FALCON-29 Add ability to tag/classify data sets and processes to enable 
+    discovery (Venkatesh Seetharam via Srikanth Sundarrajan)
+
     FALCON-31 File Installation-steps.txt contains old path to repository (Jarek 
     Jarcec Cecho via Srikanth Sundarrajan)
 

http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/d7e2be9a/client/src/main/resources/cluster-0.1.xsd
----------------------------------------------------------------------
diff --git a/client/src/main/resources/cluster-0.1.xsd b/client/src/main/resources/cluster-0.1.xsd
index b8643a7..a7b2750 100644
--- a/client/src/main/resources/cluster-0.1.xsd
+++ b/client/src/main/resources/cluster-0.1.xsd
@@ -57,6 +57,16 @@
             </xs:documentation>
         </xs:annotation>
         <xs:sequence>
+            <xs:element type="KEY_VALUE_PAIR" name="tags" minOccurs="0">
+                <xs:annotation>
+                    <xs:documentation>
+                        tags: a process specifies an optional list of comma separated tags,
+                        Key Value Pairs, separated by comma,
+                        which is used for classification of processes.
+                        Example: consumer=consumer@xyz.com, owner=producer@xyz.com, department=forecasting
+                    </xs:documentation>
+                </xs:annotation>
+            </xs:element>
             <xs:element type="interfaces" name="interfaces"/>
             <xs:element type="locations" name="locations"/>
             <xs:element type="properties" name="properties" minOccurs="0"/>
@@ -171,4 +181,9 @@
             <xs:pattern value="(([a-zA-Z]([\-a-zA-Z0-9])*){1,39})"/>
         </xs:restriction>
     </xs:simpleType>
+    <xs:simpleType name="KEY_VALUE_PAIR">
+        <xs:restriction base="xs:string">
+            <xs:pattern value="(\w+=[^,]+)?([,]?[ ]*[\w]+=[^,]+)*"/>
+        </xs:restriction>
+    </xs:simpleType>
 </xs:schema>

http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/d7e2be9a/client/src/main/resources/feed-0.1.xsd
----------------------------------------------------------------------
diff --git a/client/src/main/resources/feed-0.1.xsd b/client/src/main/resources/feed-0.1.xsd
index a2b73fe..8390565 100644
--- a/client/src/main/resources/feed-0.1.xsd
+++ b/client/src/main/resources/feed-0.1.xsd
@@ -57,8 +57,9 @@
             <xs:documentation>
                 name: A feed should have a unique name and this name is referenced
                 by processes as input or output feed.
-                groups: a feed specifies a list of comma
-                separated groups,
+                tags: a feed specifies an optional list of comma separated tags
+                which is used for classification of data sets.
+                groups: a feed specifies a list of comma separated groups,
                 a group is a logical grouping of feeds and a group is said to be
                 available if all the feeds belonging to a group are available.
                 The frequency of all
@@ -77,6 +78,16 @@
             </xs:documentation>
         </xs:annotation>
         <xs:sequence>
+            <xs:element type="KEY_VALUE_PAIR" name="tags" minOccurs="0">
+                <xs:annotation>
+                    <xs:documentation>
+                        tags: a feed specifies an optional list of comma separated tags,
+                        Key Value Pairs, separated by comma,
+                        which is used for classification of processes.
+                        Example: consumer=consumer@xyz.com, owner=producer@xyz.com, department=forecasting
+                    </xs:documentation>
+                </xs:annotation>
+            </xs:element>
             <xs:element type="partitions" name="partitions" minOccurs="0"/>
             <xs:element type="group-type" name="groups" minOccurs="0"/>
             <xs:element type="xs:string" name="availabilityFlag" minOccurs="0"/>
@@ -339,4 +350,9 @@
             <xs:pattern value="(\w+(,\w+)*)"/>
         </xs:restriction>
     </xs:simpleType>
+    <xs:simpleType name="KEY_VALUE_PAIR">
+        <xs:restriction base="xs:string">
+            <xs:pattern value="(\w+=[^,]+)?([,]?[ ]*[\w]+=[^,]+)*"/>
+        </xs:restriction>
+    </xs:simpleType>
 </xs:schema>

http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/d7e2be9a/client/src/main/resources/process-0.1.xsd
----------------------------------------------------------------------
diff --git a/client/src/main/resources/process-0.1.xsd b/client/src/main/resources/process-0.1.xsd
index 1368d85..529d5c8 100644
--- a/client/src/main/resources/process-0.1.xsd
+++ b/client/src/main/resources/process-0.1.xsd
@@ -23,23 +23,17 @@
         <xs:documentation>
             Licensed to the Apache Software Foundation (ASF) under one or more
             contributor license agreements. See the NOTICE file distributed with
-            this work for
-            additional information regarding copyright ownership.
+            this work for additional information regarding copyright ownership.
             The ASF licenses this file to You under the Apache License, Version
-            2.0
-            (the "License"); you may not use this file
-            except in compliance with
+            2.0 (the "License"); you may not use this file except in compliance with
             the License. You may obtain a copy of the License at
 
             http://www.apache.org/licenses/LICENSE-2.0
 
-            Unless required by applicable law or agreed to in
-            writing, software
+            Unless required by applicable law or agreed to in writing, software
             distributed under the License is distributed on an "AS IS" BASIS,
-            WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-            implied.
-            See the License
-            for the specific language governing permissions and
+            WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+            See the License for the specific language governing permissions and
             limitations under the License.
         </xs:documentation>
         <xs:appinfo>
@@ -52,26 +46,6 @@
     <xs:element name="process" type="process">
         <xs:annotation>
             <xs:documentation>
-                Licensed to the Apache Software Foundation (ASF) under one
-                or more contributor license agreements. See the NOTICE file
-                distributed with this work for
-                additional information
-                regarding copyright ownership. The ASF licenses this file
-                to you under the Apache License, Version 2.0 (the
-                "License"); you may not use this
-                file except in compliance
-                with the License. You may obtain a copy of the License at
-
-                http://www.apache.org/licenses/LICENSE-2.0
-
-                Unless required by applicable law or
-                agreed to in writing, software
-                distributed under the License is distributed on an "AS IS" BASIS,
-                WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-                implied.
-                See the License for the specific language governing permissions and
-                limitations under the License.
-
                 A process defines configuration for the workflow job like
                 frequency of the workflow, inputs and outputs for the workflow, how to
                 handle workflow failures, how to handle data that comes late and so on.
@@ -80,6 +54,16 @@
     </xs:element>
     <xs:complexType name="process">
         <xs:sequence>
+            <xs:element type="KEY_VALUE_PAIR" name="tags" minOccurs="0">
+                <xs:annotation>
+                    <xs:documentation>
+                        tags: a process specifies an optional list of comma separated tags,
+                        Key Value Pairs, separated by comma,
+                        which is used for classification of processes.
+                        Example: consumer=consumer@xyz.com, owner=producer@xyz.com, department=forecasting
+                    </xs:documentation>
+                </xs:annotation>
+            </xs:element>
             <xs:element type="clusters" name="clusters">
                 <xs:annotation>
                     <xs:documentation>Defines the clusters where the workflow should run
@@ -358,4 +342,10 @@
         <xs:attribute type="xs:string" name="name" use="required"/>
         <xs:attribute type="xs:string" name="value" use="required"/>
     </xs:complexType>
+
+    <xs:simpleType name="KEY_VALUE_PAIR">
+        <xs:restriction base="xs:string">
+            <xs:pattern value="(\w+=[^,]+)?([,]?[ ]*[\w]+=[^,]+)*"/>
+        </xs:restriction>
+    </xs:simpleType>
 </xs:schema>

http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/d7e2be9a/common/src/test/java/org/apache/falcon/entity/parser/ClusterEntityParserTest.java
----------------------------------------------------------------------
diff --git a/common/src/test/java/org/apache/falcon/entity/parser/ClusterEntityParserTest.java b/common/src/test/java/org/apache/falcon/entity/parser/ClusterEntityParserTest.java
index 1b34141..5c25ea4 100644
--- a/common/src/test/java/org/apache/falcon/entity/parser/ClusterEntityParserTest.java
+++ b/common/src/test/java/org/apache/falcon/entity/parser/ClusterEntityParserTest.java
@@ -83,6 +83,31 @@ public class ClusterEntityParserTest extends AbstractTestBase {
         parser.parseAndValidate(stringWriter.toString());
     }
 
+    /**
+     * A positive test for validating tags key value pair regex: key=value, key=value.
+     * @throws FalconException
+     */
+    @Test
+    public void testClusterTags() throws FalconException {
+        InputStream stream = this.getClass().getResourceAsStream(CLUSTER_XML);
+        Cluster cluster = parser.parse(stream);
+
+        final String tags = cluster.getTags();
+        Assert.assertEquals("consumer=consumer@xyz.com, owner=producer@xyz.com, department=forecasting", tags);
+
+        final String[] keys = {"consumer", "owner", "department", };
+        final String[] values = {"consumer@xyz.com", "producer@xyz.com", "forecasting", };
+
+        final String[] pairs = tags.split(",");
+        Assert.assertEquals(3, pairs.length);
+        for (int i = 0; i < pairs.length; i++) {
+            String pair = pairs[i].trim();
+            String[] parts = pair.split("=");
+            Assert.assertEquals(keys[i], parts[0]);
+            Assert.assertEquals(values[i], parts[1]);
+        }
+    }
+
     @BeforeClass
     public void init() throws Exception {
         this.dfsCluster = EmbeddedCluster.newCluster("testCluster", false);

http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/d7e2be9a/common/src/test/java/org/apache/falcon/entity/parser/FeedEntityParserTest.java
----------------------------------------------------------------------
diff --git a/common/src/test/java/org/apache/falcon/entity/parser/FeedEntityParserTest.java b/common/src/test/java/org/apache/falcon/entity/parser/FeedEntityParserTest.java
index 1d24e6c..4cc4a0e 100644
--- a/common/src/test/java/org/apache/falcon/entity/parser/FeedEntityParserTest.java
+++ b/common/src/test/java/org/apache/falcon/entity/parser/FeedEntityParserTest.java
@@ -36,6 +36,7 @@ import javax.xml.bind.JAXBException;
 import javax.xml.bind.Marshaller;
 import javax.xml.bind.Unmarshaller;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.StringWriter;
 
 import static org.testng.AssertJUnit.assertEquals;
@@ -66,7 +67,7 @@ public class FeedEntityParserTest extends AbstractTestBase {
         cluster.setName("backupCluster");
         store.publish(EntityType.CLUSTER, cluster);
 
-        modifiableFeed = (Feed) parser.parseAndValidate(this.getClass()
+        modifiableFeed = parser.parseAndValidate(this.getClass()
                 .getResourceAsStream(FEED_XML));
     }
 
@@ -137,9 +138,8 @@ public class FeedEntityParserTest extends AbstractTestBase {
 
     @Test(expectedExceptions = ValidationException.class)
     public void applyValidationInvalidFeed() throws Exception {
-        Feed feed = (Feed) parser
-                .parseAndValidate(ProcessEntityParserTest.class
-                        .getResourceAsStream(FEED_XML));
+        Feed feed = parser.parseAndValidate(ProcessEntityParserTest.class
+                .getResourceAsStream(FEED_XML));
         feed.getClusters().getClusters().get(0).setName("invalid cluster");
         parser.validate(feed);
     }
@@ -147,7 +147,7 @@ public class FeedEntityParserTest extends AbstractTestBase {
 
     @Test
     public void testPartitionExpression() throws FalconException {
-        Feed feed = (Feed) parser.parseAndValidate(ProcessEntityParserTest.class
+        Feed feed = parser.parseAndValidate(ProcessEntityParserTest.class
                 .getResourceAsStream(FEED_XML));
 
         //When there are more than 1 src clusters, there should be partition expression
@@ -429,4 +429,19 @@ public class FeedEntityParserTest extends AbstractTestBase {
 
     }
 
+    /**
+     * A negative test for validating tags key value pair regex: key=value, key=value.
+     * @throws FalconException
+     */
+    @Test
+    public void testFeedTags() throws FalconException {
+        try {
+            InputStream stream = this.getClass().getResourceAsStream("/config/feed/feed-tags-0.1.xml");
+            parser.parse(stream);
+            Assert.fail("org.xml.sax.SAXParseException should have been thrown.");
+        } catch (FalconException e) {
+            Assert.assertEquals(javax.xml.bind.UnmarshalException.class, e.getCause().getClass());
+            Assert.assertEquals(org.xml.sax.SAXParseException.class, e.getCause().getCause().getClass());
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/d7e2be9a/common/src/test/resources/config/cluster/cluster-0.1.xml
----------------------------------------------------------------------
diff --git a/common/src/test/resources/config/cluster/cluster-0.1.xml b/common/src/test/resources/config/cluster/cluster-0.1.xml
index cf53398..fd6e06e 100644
--- a/common/src/test/resources/config/cluster/cluster-0.1.xml
+++ b/common/src/test/resources/config/cluster/cluster-0.1.xml
@@ -17,8 +17,8 @@
   limitations under the License.
   -->
 
-<cluster colo="default" description="" name="testCluster" xmlns="uri:falcon:cluster:0.1"
-        >
+<cluster colo="default" description="" name="testCluster" xmlns="uri:falcon:cluster:0.1">
+    <tags>consumer=consumer@xyz.com, owner=producer@xyz.com, department=forecasting</tags>
     <interfaces>
         <interface type="readonly" endpoint="hftp://localhost:50010"
                    version="0.20.2"/>

http://git-wip-us.apache.org/repos/asf/incubator-falcon/blob/d7e2be9a/common/src/test/resources/config/feed/feed-tags-0.1.xml
----------------------------------------------------------------------
diff --git a/common/src/test/resources/config/feed/feed-tags-0.1.xml b/common/src/test/resources/config/feed/feed-tags-0.1.xml
new file mode 100644
index 0000000..8429f61
--- /dev/null
+++ b/common/src/test/resources/config/feed/feed-tags-0.1.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+<feed description="clicks log" name="tags" xmlns="uri:falcon:feed:0.1">
+    <tags>   consumer = consumer@xyz.com, owner = producer@xyz.com, =forecasting   </tags>
+    <partitions>
+        <partition name="fraud"/>
+        <partition name="good"/>
+    </partitions>
+
+    <groups>online,bi</groups>
+    <availabilityFlag>_SUCCESS</availabilityFlag>
+
+    <frequency>hours(1)</frequency>
+    <timezone>UTC</timezone>
+
+    <late-arrival cut-off="hours(6)"/>
+
+    <clusters>
+        <cluster name="testCluster" type="source">
+            <validity start="2011-11-01T00:00Z" end="2011-12-31T00:00Z"/>
+            <retention limit="hours(48)" action="delete"/>
+            <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+            <locations>
+                <location type="data" path="/projects/falcon/clicks"/>
+                <location type="stats" path="/projects/falcon/clicksStats"/>
+                <location type="meta" path="/projects/falcon/clicksMetaData"/>
+            </locations>
+        </cluster>
+        <cluster name="backupCluster" type="target">
+            <validity start="2011-11-01T00:00Z" end="2011-12-31T00:00Z"/>
+            <retention limit="hours(6)" action="archive"/>
+            <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+        </cluster>
+    </clusters>
+
+    <locations>
+        <location type="data" path="/projects/falcon/clicks"/>
+        <location type="stats" path="/projects/falcon/clicksStats"/>
+        <location type="meta" path="/projects/falcon/clicksMetaData"/>
+    </locations>
+
+    <ACL owner="testuser" group="group" permission="0x755"/>
+    <schema location="/schema/clicks" provider="protobuf"/>
+</feed>