You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by mo...@apache.org on 2012/12/22 00:43:57 UTC
svn commit: r1425199 - in
/oozie/branches/hcat-intre/examples/src/main/apps/hcatalog: ./ README
coordinator.xml id.pig job.properties workflow.xml
Author: mona
Date: Fri Dec 21 23:43:57 2012
New Revision: 1425199
URL: http://svn.apache.org/viewvc?rev=1425199&view=rev
Log:
adding hcatalog coordinator example directory
Added:
oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/
oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README
oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml
oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/id.pig
oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/job.properties
oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/workflow.xml
Added: oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README?rev=1425199&view=auto
==============================================================================
--- oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README (added)
+++ oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/README Fri Dec 21 23:43:57 2012
@@ -0,0 +1,20 @@
+Running the coordinator example with HCatalog (End-to-end test)
+
+1. Setup services - Hive with JMS (e.g. ActiveMQ Server), HCatalog, database e.g. MySQL, and of course Hadoop
+2. This example points to hive.metastore.uris=thrift://localhost:11002. Change it in job.properties if required
+3. Create 2 tables 'invites' (input) and 'oozie' (output) with this structure: "create table invites (foo INT, bar INT) partitioned by (ds STRING, region STRING)"
+4. Oozie distro should be built this way $> bin/mkdistro.sh -Dhcatalog.version=0.4.1 -DskipTests
+5. The 'libext' dir used by oozie-setup should contain the following list JARS
+ jackson-mapper-asl-1.8.8.jar
+ jackson-core-asl-1.8.8.jar
+ hive-common-0.9.1.2.jar
+ hive-metastore-0.9.1.2.jar
+ hive-exec-0.9.1.2.jar
+ libfb303.jar
+ jdo2-api-2.3-ec.jar
+ datanucleus-core-2.0.3.jar
+ (Note) hcatalog JARs will be automatically injected
+6. Upload this application directory to HDFS
+7. Run Oozie job using the job.properties. Coordinator actions will be in WAITING
+8. Make input dependencies available throught HCat client by "alter table invites add partition (ds='2010-01-01', region='usa')". This event will start the workflows with pig action
+9. First workflow will SUCCEED as expected, however second one will fail due to 'partition already exists' error. Disregard this. The example demonstrates working as expected.
Added: oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml?rev=1425199&view=auto
==============================================================================
--- oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml (added)
+++ oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/coordinator.xml Fri Dec 21 23:43:57 2012
@@ -0,0 +1,73 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<coordinator-app name="cron-coord" frequency="${coord:minutes(10)}" start="${start}" end="${end}" timezone="UTC"
+ xmlns="uri:oozie:coordinator:0.2">
+ <datasets>
+ <dataset name="raw-logs" frequency="${coord:minutes(20)}" initial-instance="2010-01-01T00:00Z" timezone="UTC">
+ <uri-template>${hcatNode}/${db}/${table}/ds=${YEAR}-${MONTH}-${DAY};region=${region}</uri-template>
+ <done-flag></done-flag>
+ </dataset>
+ <dataset name="processed-logs" frequency="${coord:minutes(20)}" initial-instance="2010-01-01T00:00Z" timezone="UTC">
+ <uri-template>${hcatNode}/${db}/${outputtable}/ds=${dataOut};region=${region}</uri-template>
+ <done-flag></done-flag>
+ </dataset>
+ </datasets>
+
+ <input-events>
+ <data-in name="input" dataset="raw-logs">
+ <instance>${coord:current(0)}</instance>
+ </data-in>
+ </input-events>
+ <output-events>
+ <data-out name="output" dataset="processed-logs">
+ <instance>${coord:current(0)}</instance>
+ </data-out>
+ </output-events>
+
+ <action>
+ <workflow>
+ <app-path>${workflowAppUri}</app-path>
+ <configuration>
+ <property>
+ <name>DB</name>
+ <value>${coord:database('input', 'input')}</value>
+ </property>
+ <property>
+ <name>TABLE</name>
+ <value>${coord:table('input', 'input')}</value>
+ </property>
+ <property>
+ <name>FILTER</name>
+ <value>${coord:dataInPartitionFilter('input')}</value>
+ </property>
+ <property>
+ <name>OUTPUT_PARTITION</name>
+ <value>${coord:dataOutPartition('output')}</value>
+ </property>
+ <property>
+ <name>OUTPUT_DB</name>
+ <value>${coord:database('output', 'output')}</value>
+ </property>
+ <property>
+ <name>OUTPUT_TABLE</name>
+ <value>${coord:table('output', 'output')}</value>
+ </property>
+ </configuration>
+ </workflow>
+ </action>
+</coordinator-app>
Added: oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/id.pig
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/id.pig?rev=1425199&view=auto
==============================================================================
--- oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/id.pig (added)
+++ oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/id.pig Fri Dec 21 23:43:57 2012
@@ -0,0 +1,4 @@
+A = load '$DB.$TABLE' using org.apache.hcatalog.pig.HCatLoader();
+B = FILTER A BY $FILTER;
+C = foreach B generate foo, bar;
+store C into '$OUTPUT_DB.$OUTPUT_TABLE' USING org.apache.hcatalog.pig.HCatStorer('$OUTPUT_PARTITION');
Added: oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/job.properties
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/job.properties?rev=1425199&view=auto
==============================================================================
--- oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/job.properties (added)
+++ oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/job.properties Fri Dec 21 23:43:57 2012
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+nameNode=hdfs://localhost:8020
+jobTracker=localhost:8021
+queueName=default
+examplesRoot=examples
+
+oozie.coord.application.path=${nameNode}/user/${user.name}/${examplesRoot}/apps/hcatalog
+hcatNode=hcat://localhost:11002
+db=default
+table=invites
+start=2010-01-01T01:00Z
+end=2010-01-01T01:20Z
+workflowAppUri=${nameNode}/user/${user.name}/${examplesRoot}/apps/hcatalog
+dataOut=2011-01-01
+outputtable=oozie
+region=usa
Added: oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/workflow.xml
URL: http://svn.apache.org/viewvc/oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/workflow.xml?rev=1425199&view=auto
==============================================================================
--- oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/workflow.xml (added)
+++ oozie/branches/hcat-intre/examples/src/main/apps/hcatalog/workflow.xml Fri Dec 21 23:43:57 2012
@@ -0,0 +1,50 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<workflow-app xmlns="uri:oozie:workflow:0.2" name="pig-wf">
+ <start to="pig-node"/>
+ <action name="pig-node">
+ <pig>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>mapred.compress.map.output</name>
+ <value>true</value>
+ </property>
+ </configuration>
+ <script>id.pig</script>
+ <param>DB=${DB}</param>
+ <param>TABLE=${TABLE}</param>
+ <param>FILTER=${FILTER}</param>
+ <param>OUTPUT_DB=${OUTPUT_DB}</param>
+ <param>OUTPUT_TABLE=${OUTPUT_TABLE}</param>
+ <param>OUTPUT_PARTITION=${OUTPUT_PARTITION}</param>
+ <file>lib/hive-site.xml</file>
+ </pig>
+ <ok to="end"/>
+ <error to="fail"/>
+ </action>
+ <kill name="fail">
+ <message>Pig failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+ </kill>
+ <end name="end"/>
+</workflow-app>