You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by mo...@apache.org on 2012/12/22 00:43:57 UTC

svn commit: r1425199 - in /oozie/branches/hcat-intre/examples/src/main/apps/hcatalog: ./ README coordinator.xml id.pig job.properties workflow.xml

Author: mona
Date: Fri Dec 21 23:43:57 2012
New Revision: 1425199

URL: http://svn.apache.org/viewvc?rev=1425199&view=rev
Log:
adding hcatalog coordinator example directory

Added:
    oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/
    oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README
    oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml
    oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/id.pig
    oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/job.properties
    oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/workflow.xml

Added: oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README?rev=1425199&view=auto
==============================================================================
--- oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README (added)
+++ oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README Fri Dec 21 23:43:57 2012
@@ -0,0 +1,20 @@
+Running the coordinator example with HCatalog (End-to-end test)
+
+1. Setup services - Hive with JMS (e.g. ActiveMQ Server), HCatalog, database e.g. MySQL, and of course Hadoop
+2. This example points to hive.metastore.uris=thrift://localhost:11002. Change it in job.properties if required
+3. Create 2 tables 'invites' (input) and 'oozie' (output) with this structure: "create table invites (foo INT, bar INT) partitioned by (ds STRING, region STRING)"
+4. Oozie distro should be built this way $> bin/mkdistro.sh -Dhcatalog.version=0.4.1 -DskipTests
+5. The 'libext' dir used by oozie-setup should contain the following list JARS
+   jackson-mapper-asl-1.8.8.jar
+   jackson-core-asl-1.8.8.jar
+   hive-common-0.9.1.2.jar
+   hive-metastore-0.9.1.2.jar
+   hive-exec-0.9.1.2.jar
+   libfb303.jar
+   jdo2-api-2.3-ec.jar
+   datanucleus-core-2.0.3.jar
+   (Note) hcatalog JARs will be automatically injected
+6. Upload this application directory to HDFS
+7. Run Oozie job using the job.properties. Coordinator actions will be in WAITING
+8. Make input dependencies available throught HCat client by "alter table invites add partition (ds='2010-01-01', region='usa')". This event will start the workflows with pig action
+9. First workflow will SUCCEED as expected, however second one will fail due to 'partition already exists' error. Disregard this. The example demonstrates working as expected.

Added: oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml?rev=1425199&view=auto
==============================================================================
--- oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml (added)
+++ oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml Fri Dec 21 23:43:57 2012
@@ -0,0 +1,73 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<coordinator-app name="cron-coord" frequency="${coord:minutes(10)}" start="${start}" end="${end}" timezone="UTC"
+                 xmlns="uri:oozie:coordinator:0.2">
+    <datasets>
+        <dataset name="raw-logs" frequency="${coord:minutes(20)}" initial-instance="2010-01-01T00:00Z" timezone="UTC">
+            <uri-template>${hcatNode}/${db}/${table}/ds=${YEAR}-${MONTH}-${DAY};region=${region}</uri-template>
+	    <done-flag></done-flag>
+        </dataset>
+        <dataset name="processed-logs" frequency="${coord:minutes(20)}" initial-instance="2010-01-01T00:00Z" timezone="UTC">
+            <uri-template>${hcatNode}/${db}/${outputtable}/ds=${dataOut};region=${region}</uri-template>
+	    <done-flag></done-flag>
+        </dataset>
+    </datasets>
+
+    <input-events>
+        <data-in name="input" dataset="raw-logs">
+            <instance>${coord:current(0)}</instance>
+        </data-in>
+    </input-events>
+    <output-events>
+        <data-out name="output" dataset="processed-logs">
+            <instance>${coord:current(0)}</instance>
+        </data-out>
+    </output-events>
+
+        <action>
+        <workflow>
+            <app-path>${workflowAppUri}</app-path>
+            <configuration>
+                <property>
+                    <name>DB</name>
+                    <value>${coord:database('input', 'input')}</value>
+                </property>
+                <property>
+                    <name>TABLE</name>
+                    <value>${coord:table('input', 'input')}</value>
+                </property>
+                <property>
+                    <name>FILTER</name>
+                    <value>${coord:dataInPartitionFilter('input')}</value>
+                </property>
+                <property>
+                    <name>OUTPUT_PARTITION</name>
+                    <value>${coord:dataOutPartition('output')}</value>
+                </property>
+                <property>
+                    <name>OUTPUT_DB</name>
+                    <value>${coord:database('output', 'output')}</value>
+                </property>
+                <property>
+                    <name>OUTPUT_TABLE</name>
+                    <value>${coord:table('output', 'output')}</value>
+                </property>
+            </configuration>
+        </workflow>
+    </action>
+</coordinator-app>

Added: oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/id.pig
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/id.pig?rev=1425199&view=auto
==============================================================================
--- oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/id.pig (added)
+++ oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/id.pig Fri Dec 21 23:43:57 2012
@@ -0,0 +1,4 @@
+A = load '$DB.$TABLE' using org.apache.hcatalog.pig.HCatLoader();
+B = FILTER A BY $FILTER;
+C = foreach B generate foo, bar;
+store C into '$OUTPUT_DB.$OUTPUT_TABLE' USING org.apache.hcatalog.pig.HCatStorer('$OUTPUT_PARTITION');

Added: oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/job.properties
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/job.properties?rev=1425199&view=auto
==============================================================================
--- oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/job.properties (added)
+++ oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/job.properties Fri Dec 21 23:43:57 2012
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+nameNode=hdfs://localhost:8020
+jobTracker=localhost:8021
+queueName=default
+examplesRoot=examples
+
+oozie.coord.application.path=${nameNode}/user/${user.name}/${examplesRoot}/apps/hcatalog
+hcatNode=hcat://localhost:11002
+db=default
+table=invites
+start=2010-01-01T01:00Z
+end=2010-01-01T01:20Z
+workflowAppUri=${nameNode}/user/${user.name}/${examplesRoot}/apps/hcatalog
+dataOut=2011-01-01
+outputtable=oozie
+region=usa

Added: oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/workflow.xml
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/workflow.xml?rev=1425199&view=auto
==============================================================================
--- oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/workflow.xml (added)
+++ oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/workflow.xml Fri Dec 21 23:43:57 2012
@@ -0,0 +1,50 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<workflow-app xmlns="uri:oozie:workflow:0.2" name="pig-wf">
+    <start to="pig-node"/>
+    <action name="pig-node">
+        <pig>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <configuration>
+                <property>
+                    <name>mapred.job.queue.name</name>
+                    <value>${queueName}</value>
+                </property>
+                <property>
+                    <name>mapred.compress.map.output</name>
+                    <value>true</value>
+                </property>
+            </configuration>
+            <script>id.pig</script>
+	    <param>DB=${DB}</param>
+	    <param>TABLE=${TABLE}</param>
+	    <param>FILTER=${FILTER}</param>
+            <param>OUTPUT_DB=${OUTPUT_DB}</param>
+            <param>OUTPUT_TABLE=${OUTPUT_TABLE}</param>
+            <param>OUTPUT_PARTITION=${OUTPUT_PARTITION}</param>
+	    <file>lib/hive-site.xml</file>
+        </pig>
+        <ok to="end"/>
+        <error to="fail"/>
+    </action>
+    <kill name="fail">
+        <message>Pig failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    <end name="end"/>
+</workflow-app>