You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by tu...@apache.org on 2011/11/30 00:11:42 UTC
svn commit: r1208126 [2/2] - in /incubator/oozie/trunk: ./ client/src/main/java/org/apache/oozie/cli/ client/src/main/resources/ core/ core/src/main/conf/ core/src/main/java/org/apache/oozie/ core/src/main/java/org/apache/oozie/action/hadoop/ core/src/...

Added: incubator/oozie/trunk/docs/src/site/twiki/DG_HiveActionExtension.twiki
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/docs/src/site/twiki/DG_HiveActionExtension.twiki?rev=1208126&view=auto
==============================================================================
--- incubator/oozie/trunk/docs/src/site/twiki/DG_HiveActionExtension.twiki (added)
+++ incubator/oozie/trunk/docs/src/site/twiki/DG_HiveActionExtension.twiki Tue Nov 29 23:11:37 2011
@@ -0,0 +1,221 @@
+<noautolink>
+
+[[index][::Go back to Oozie Documentation Index::]]
+
+-----
+
+---+!! Oozie Hive Action Extension
+
+%TOC%
+
+#HiveAction
+---++ Hive Action
+
+The =hive= action runs a Hive job.
+
+The workflow job will wait until the Hive job completes before
+continuing to the next action.
+
+To run the Hive job, you have to configure the =hive= action with the
+=job-tracker=, =name-node= and Hive =script= elements as
+well as the necessary parameters and configuration.
+
+A =hive= action can be configured to create or delete HDFS directories
+before starting the Hive job.
+
+Hive configuration can be specified with a file, using the =job-xml=
+element, and inline, using the =configuration= elements.
+
+Oozie EL expressions can be used in the inline configuration. Property
+values specified in the =configuration= element override values specified
+in the =job-xml= file.
+
+Note that Hadoop =mapred.job.tracker= and =fs.default.name= properties
+must not be present in the inline configuration.
+
+As with Hadoop =map-reduce= jobs, it is possible to add files and
+archives in order to make them available to the Hive job. Refer to the
+[WorkflowFunctionalSpec#FilesAchives][Adding Files and Archives for the Job]
+section for more information about this feature.
+
+Oozie Hive action supports Hive scripts with parameter variables, their
+syntax is =${VARIABLES}=.
+
+*Syntax:*
+
+<verbatim>
+<workflow-app name="[WF-DEF-NAME]" xmlns="uri:oozie:workflow:0.1">
+    ...
+    <action name="[NODE-NAME]">
+        <hive xmlns="uri:oozie:hive-action:0.2">
+            <job-tracker>[JOB-TRACKER]</job-tracker>
+            <name-node>[NAME-NODE]</name-node>
+            <prepare>
+               <delete path="[PATH]"/>
+               ...
+               <mkdir path="[PATH]"/>
+               ...
+            </prepare>
+            <job-xml>[HIVE SETTINGS FILE]</job-xml>
+            <configuration>
+                <property>
+                    <name>[PROPERTY-NAME]</name>
+                    <value>[PROPERTY-VALUE]</value>
+                </property>
+                ...
+            </configuration>
+            <script>[HIVE-SCRIPT]</script>
+            <param>[PARAM-VALUE]</param>
+                ...
+            <param>[PARAM-VALUE]</param>
+            <file>[FILE-PATH]</file>
+            ...
+            <archive>[FILE-PATH]</archive>
+            ...
+        </hive>
+        <ok to="[NODE-NAME]"/>
+        <error to="[NODE-NAME]"/>
+    </action>
+    ...
+</workflow-app>
+</verbatim>
+
+The =prepare= element, if present, indicates a list of paths to delete
+or create before starting the job. Specified paths must start with =hdfs://HOST:PORT=.
+
+The =job-xml= element, if present, specifies a file containing configuration
+for the Hive job.
+
+The =configuration= element, if present, contains configuration
+properties that are passed to the Hive job.
+
+The =script= element must contain the path of the Hive script to
+execute. The Hive script can be templatized with variables of the form
+=${VARIABLE}=. The values of these variables can then be specified
+using the =params= element.
+
+The =params= element, if present, contains parameters to be passed to
+the Hive script.
+
+All the above elements can be parameterized (templatized) using EL
+expressions.
+
+*Example:*
+
+<verbatim>
+<workflow-app name="sample-wf" xmlns="uri:oozie:workflow:0.1">
+    ...
+    <action name="myfirsthivejob">
+        <hive xmlns="uri:oozie:hive-action:0.2">
+            <job-traker>foo:9001</job-tracker>
+            <name-node>bar:9000</name-node>
+            <prepare>
+                <delete path="${jobOutput}"/>
+            </prepare>
+            <configuration>
+                <property>
+                    <name>mapred.compress.map.output</name>
+                    <value>true</value>
+                </property>
+                <property>
+                    <name>oozie.hive.defaults</name>
+                    <value>/usr/foo/hive-0.6-default.xml</value>
+                </property>
+            </configuration>
+            <script>myscript.q</script>
+            <param>InputDir=/home/tucu/input-data</param>
+            <param>OutputDir=${jobOutput}</param>
+        </hive>
+        <ok to="myotherjob"/>
+        <error to="errorcleanup"/>
+    </action>
+    ...
+</workflow-app>
+</verbatim>
+
+---+++ Hive Default and Site Configuration Files
+
+All the properties defined in the =job-xml= and inline in the =configuration=
+element become the =hive-site.xml= that Hive will use.
+
+Hive (as of Hive 0.6) does not yet include a =hive-default.xml= file, it is the responsibility
+of the user to provide one. When using Oozie Hive action, the =hive-default.xml= file must be
+copied to HDFS and in the Hive action =configuration= section it must be set in the =oozie.hive.defaults=
+property. If a relative path is given, the path will be resolved within the workflow application
+directory.
+
+*NOTE:* When Hive starts bundling a =hive-default.xml= file within its JARs, Oozie will ignore
+the =hive-default.xml= file specified in the Hive action configuration.
+
+If a =hive-site.xml= file is not specified (or available in Hive JARs), the Oozie Hive action will fail.
+
+---+++ Hive Action Logging
+
+Hive action logs are redirected to the Oozie Launcher map-reduce job task STDOUT/STDERR that runs Hive.
+
+From Oozie web-console, from the Hive action pop up using the 'Console URL' link, it is possible
+to navigate to the Oozie Launcher map-reduce job task logs via the Hadoop job-tracker web-console.
+
+The logging level of the Hive action can set in the Hive action configuration using the
+property =oozie.hive.log.level=. The default value is =INFO=.
+
+---++ Appendix, Hive XML-Schema
+
+---+++ AE.A Appendix A, Hive XML-Schema
+
+<verbatim>
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
+           xmlns:hive="uri:oozie:hive-action:0.2" elementFormDefault="qualified"
+           targetNamespace="uri:oozie:hive-action:0.2">
+.
+    <xs:element name="hive" type="hive:ACTION"/>
+.
+    <xs:complexType name="ACTION">
+        <xs:sequence>
+            <xs:element name="job-tracker" type="xs:string" minOccurs="1" maxOccurs="1"/>
+            <xs:element name="name-node" type="xs:string" minOccurs="1" maxOccurs="1"/>
+            <xs:element name="prepare" type="hive:PREPARE" minOccurs="0" maxOccurs="1"/>
+            <xs:element name="job-xml" type="xs:string" minOccurs="0" maxOccurs="1"/>
+            <xs:element name="configuration" type="hive:CONFIGURATION" minOccurs="0" maxOccurs="1"/>
+            <xs:element name="script" type="xs:string" minOccurs="1" maxOccurs="1"/>
+            <xs:element name="param" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
+            <xs:element name="file" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
+            <xs:element name="archive" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
+        </xs:sequence>
+    </xs:complexType>
+.
+    <xs:complexType name="CONFIGURATION">
+        <xs:sequence>
+            <xs:element name="property" minOccurs="1" maxOccurs="unbounded">
+                <xs:complexType>
+                    <xs:sequence>
+                        <xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/>
+                        <xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/>
+                        <xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/>
+                    </xs:sequence>
+                </xs:complexType>
+            </xs:element>
+        </xs:sequence>
+    </xs:complexType>
+.
+    <xs:complexType name="PREPARE">
+        <xs:sequence>
+            <xs:element name="delete" type="hive:DELETE" minOccurs="0" maxOccurs="unbounded"/>
+            <xs:element name="mkdir" type="hive:MKDIR" minOccurs="0" maxOccurs="unbounded"/>
+        </xs:sequence>
+    </xs:complexType>
+.
+    <xs:complexType name="DELETE">
+        <xs:attribute name="path" type="xs:string" use="required"/>
+    </xs:complexType>
+.
+    <xs:complexType name="MKDIR">
+        <xs:attribute name="path" type="xs:string" use="required"/>
+    </xs:complexType>
+.
+</xs:schema>
+</verbatim>
+
+[[index][::Go back to Oozie Documentation Index::]]
+
+</noautolink>

Modified: incubator/oozie/trunk/docs/src/site/twiki/ENG_Building.twiki
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/docs/src/site/twiki/ENG_Building.twiki?rev=1208126&r1=1208125&r2=1208126&view=diff
==============================================================================
--- incubator/oozie/trunk/docs/src/site/twiki/ENG_Building.twiki (original)
+++ incubator/oozie/trunk/docs/src/site/twiki/ENG_Building.twiki Tue Nov 29 23:11:37 2011
@@ -190,6 +190,15 @@ value is =/tmp=.
 
 *test.exclude.pattern*= : specifies one or more patterns for testcases to exclude, for example =**/Test*Command.java=.
 
+---+++ Testing Hive Action
+
+Because of depedencies incompatibilities between Hive 0.9.0 and Pig 0.9.0 to run Hive testcases a special profile
+must be used together with the name of the Hive testcases.
+
+<verbatim>
+$ mvn -DtestHive -Dtest=TestHiveActionExecutor,TestHiveMain
+</verbatim>
+
 ---++ Building an Oozie Distribution
 
 An Oozie distribution bundles an embedded Tomcat server. The Oozie distro module downloads Tomcat TAR.GZ from Apache

Modified: incubator/oozie/trunk/docs/src/site/twiki/index.twiki
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/docs/src/site/twiki/index.twiki?rev=1208126&r1=1208125&r2=1208126&view=diff
==============================================================================
--- incubator/oozie/trunk/docs/src/site/twiki/index.twiki (original)
+++ incubator/oozie/trunk/docs/src/site/twiki/index.twiki Tue Nov 29 23:11:37 2011
@@ -45,13 +45,17 @@ Enough reading already? Follow the steps
    * [[DG_CommandLineTool][Command Line Tool]]
    * [[DG_WorkflowReRun][Workflow Re-runs Explained]]
    * [[DG_UsingHadoopKerberos][Using a Hadoop cluster with Kerberos Authentication]]
-   * [[DG_EmailActionExtension][Email Action]]
 
    * [[DG_CustomActionExecutor][Writing a Custom Action Executor]]
    * [[./client/apidocs/index.html][Oozie Client Javadocs]]
    * [[./core/apidocs/index.html][Oozie Core Javadocs]]
    * [[WebServicesAPI][Oozie Web Services API]]
 
+---+++ Actions Extension
+
+   * [[DG_EmailActionExtension][Email Action]]
+   * [[DG_HiveActionExtension][Hive Action]]
+
 ---++ Administrator Documentation
 
    * [[AG_Install][Oozie Install]]

Modified: incubator/oozie/trunk/examples/pom.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/examples/pom.xml?rev=1208126&r1=1208125&r2=1208126&view=diff
==============================================================================
--- incubator/oozie/trunk/examples/pom.xml (original)
+++ incubator/oozie/trunk/examples/pom.xml Tue Nov 29 23:11:37 2011
@@ -79,6 +79,18 @@
            <scope>test</scope>
         </dependency>
 
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+           <groupId>org.apache.derby</groupId>
+           <artifactId>derby</artifactId>
+           <scope>compile</scope>
+        </dependency>
+
     </dependencies>
     <build>
         <plugins>

Added: incubator/oozie/trunk/examples/src/main/apps/hive/job.properties
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/examples/src/main/apps/hive/job.properties?rev=1208126&view=auto
==============================================================================
--- incubator/oozie/trunk/examples/src/main/apps/hive/job.properties (added)
+++ incubator/oozie/trunk/examples/src/main/apps/hive/job.properties Tue Nov 29 23:11:37 2011
@@ -0,0 +1,25 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+nameNode=hdfs://localhost:8020
+jobTracker=localhost:8021
+queueName=default
+examplesRoot=examples
+
+oozie.use.system.libpath=true
+
+oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/apps/hive

Added: incubator/oozie/trunk/examples/src/main/apps/hive/my-hive-default.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/examples/src/main/apps/hive/my-hive-default.xml?rev=1208126&view=auto
==============================================================================
--- incubator/oozie/trunk/examples/src/main/apps/hive/my-hive-default.xml (added)
+++ incubator/oozie/trunk/examples/src/main/apps/hive/my-hive-default.xml Tue Nov 29 23:11:37 2011
@@ -0,0 +1,451 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<configuration>
+
+<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files  -->
+<!-- that are implied by Hadoop setup variables.                                                -->
+<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive    -->
+<!-- users do not have to edit hadoop configuration files (that may be managed as a centralized -->
+<!-- resource).                                                                                 -->
+
+<!-- Hive Execution Parameters -->
+<property>
+  <name>mapred.reduce.tasks</name>
+  <value>-1</value>
+    <description>The default number of reduce tasks per job.  Typically set
+  to a prime close to the number of available hosts.  Ignored when
+  mapred.job.tracker is "local". Hadoop set this to 1 by default, whereas hive uses -1 as its default value.
+  By setting this property to -1, Hive will automatically figure out what should be the number of reducers.
+  </description>
+</property>
+
+<property>
+  <name>hive.exec.reducers.bytes.per.reducer</name>
+  <value>1000000000</value>
+  <description>size per reducer.The default is 1G, i.e if the input size is 10G, it will use 10 reducers.</description>
+</property>
+
+<property>
+  <name>hive.exec.reducers.max</name>
+  <value>999</value>
+  <description>max number of reducers will be used. If the one
+	specified in the configuration parameter mapred.reduce.tasks is
+	negative, hive will use this one as the max number of reducers when
+	automatically determine number of reducers.</description>
+</property>
+
+<property>
+  <name>hive.exec.scratchdir</name>
+  <value>/tmp/hive-${user.name}</value>
+  <description>Scratch space for Hive jobs</description>
+</property>
+
+<property>
+  <name>hive.test.mode</name>
+  <value>false</value>
+  <description>whether hive is running in test mode. If yes, it turns on sampling and prefixes the output tablename</description>
+</property>
+
+<property>
+  <name>hive.test.mode.prefix</name>
+  <value>test_</value>
+  <description>if hive is running in test mode, prefixes the output table by this string</description>
+</property>
+
+<!-- If the input table is not bucketed, the denominator of the tablesample is determinied by the parameter below   -->
+<!-- For example, the following query:                                                                              -->
+<!--   INSERT OVERWRITE TABLE dest                                                                                  -->
+<!--   SELECT col1 from src                                                                                         -->
+<!-- would be converted to                                                                                          -->
+<!--   INSERT OVERWRITE TABLE test_dest                                                                             -->
+<!--   SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1))                                             -->
+<property>
+  <name>hive.test.mode.samplefreq</name>
+  <value>32</value>
+  <description>if hive is running in test mode and table is not bucketed, sampling frequency</description>
+</property>
+
+<property>
+  <name>hive.test.mode.nosamplelist</name>
+  <value></value>
+  <description>if hive is running in test mode, dont sample the above comma seperated list of tables</description>
+</property>
+
+<property>
+  <name>hive.metastore.local</name>
+  <value>true</value>
+  <description>controls whether to connect to remove metastore server or open a new metastore server in Hive Client JVM</description>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionURL</name>
+  <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
+  <description>JDBC connect string for a JDBC metastore</description>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionDriverName</name>
+  <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+  <description>Driver class name for a JDBC metastore</description>
+</property>
+
+<property>
+  <name>javax.jdo.PersistenceManagerFactoryClass</name>
+  <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
+  <description>class implementing the jdo persistence</description>
+</property>
+
+<property>
+  <name>javax.jdo.option.DetachAllOnCommit</name>
+  <value>true</value>
+  <description>detaches all objects from session so that they can be used after transaction is committed</description>
+</property>
+
+<property>
+  <name>javax.jdo.option.NonTransactionalRead</name>
+  <value>true</value>
+  <description>reads outside of transactions</description>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionUserName</name>
+  <value>APP</value>
+  <description>username to use against metastore database</description>
+</property>
+
+<!--<property>-->
+  <!--<name>javax.jdo.option.ConnectionPassword</name>-->
+  <!--<value>mine</value>-->
+  <!--<description>password to use against metastore database</description>-->
+<!--</property>-->
+
+<property>
+  <name>datanucleus.validateTables</name>
+  <value>false</value>
+  <description>validates existing schema against code. turn this on if you want to verify existing schema </description>
+</property>
+
+<property>
+  <name>datanucleus.validateColumns</name>
+  <value>false</value>
+  <description>validates existing schema against code. turn this on if you want to verify existing schema </description>
+</property>
+
+<property>
+  <name>datanucleus.validateConstraints</name>
+  <value>false</value>
+  <description>validates existing schema against code. turn this on if you want to verify existing schema </description>
+</property>
+
+<property>
+  <name>datanucleus.storeManagerType</name>
+  <value>rdbms</value>
+  <description>metadata store type</description>
+</property>
+
+<property>
+  <name>datanucleus.autoCreateSchema</name>
+  <value>true</value>
+  <description>creates necessary schema on a startup if one doesn't exist. set this to false, after creating it once</description>
+</property>
+
+<property>
+  <name>datanucleus.autoStartMechanismMode</name>
+  <value>checked</value>
+  <description>throw exception if metadata tables are incorrect</description>
+</property>
+
+<property>
+  <name>datancucleus.transactionIsolation</name>
+  <value>read-committed</value>
+  <description></description>
+</property>
+
+<property>
+  <name>datanuclues.cache.level2</name>
+  <value>true</value>
+  <description>use a level 2 cache. turn this off if metadata is changed independently of hive metastore server</description>
+</property>
+
+<property>
+  <name>datanuclues.cache.level2.type</name>
+  <value>SOFT</value>
+  <description>SOFT=soft reference based cache, WEAK=weak reference based cache.</description>
+</property>
+
+<property>
+  <name>hive.metastore.warehouse.dir</name>
+  <value>/user/hive/warehouse</value>
+  <description>location of default database for the warehouse</description>
+</property>
+
+<property>
+  <name>hive.metastore.connect.retries</name>
+  <value>5</value>
+  <description>Number of retries while opening a connection to metastore</description>
+</property>
+
+<property>
+  <name>hive.metastore.rawstore.impl</name>
+  <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
+  <description>Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. This class is used to store and retrieval of raw metadata objects such as table, database</description>
+</property>
+
+<property>
+  <name>hive.default.fileformat</name>
+  <value>TextFile</value>
+  <description>Default file format for CREATE TABLE statement. Options are TextFile and SequenceFile. Users can explicitly say CREATE TABLE ... STORED AS &lt;TEXTFILE|SEQUENCEFILE&gt; to override</description>
+</property>
+
+<property>
+  <name>hive.fileformat.check</name>
+  <value>true</value>
+  <description>Whether to check file format or not when loading data files</description>
+</property>
+
+<property>
+  <name>hive.map.aggr</name>
+  <value>true</value>
+  <description>Whether to use map-side aggregation in Hive Group By queries</description>
+</property>
+
+<property>
+  <name>hive.groupby.skewindata</name>
+  <value>false</value>
+  <description>Whether there is skew in data to optimize group by queries</description>
+</property>
+
+<property>
+  <name>hive.groupby.mapaggr.checkinterval</name>
+  <value>100000</value>
+  <description>Number of rows after which size of the grouping keys/aggregation classes is performed</description>
+</property>
+
+<property>
+  <name>hive.mapred.local.mem</name>
+  <value>0</value>
+  <description>For local mode, memory of the mappers/reducers</description>
+</property>
+
+<property>
+  <name>hive.map.aggr.hash.percentmemory</name>
+  <value>0.5</value>
+  <description>Portion of total memory to be used by map-side grup aggregation hash table</description>
+</property>
+
+<property>
+  <name>hive.map.aggr.hash.min.reduction</name>
+  <value>0.5</value>
+  <description>Hash aggregation will be turned off if the ratio between hash
+  table size and input rows is bigger than this number. Set to 1 to make sure
+  hash aggregation is never turned off.</description>
+</property>
+
+<property>
+  <name>hive.optimize.cp</name>
+  <value>true</value>
+  <description>Whether to enable column pruner</description>
+</property>
+
+<property>
+  <name>hive.optimize.ppd</name>
+  <value>true</value>
+  <description>Whether to enable predicate pushdown</description>
+</property>
+
+<property>
+  <name>hive.optimize.pruner</name>
+  <value>true</value>
+  <description>Whether to enable the new partition pruner which depends on predicate pushdown. If this is disabled,
+  the old partition pruner which is based on AST will be enabled.</description>
+</property>
+
+<property>
+  <name>hive.optimize.groupby</name>
+  <value>true</value>
+  <description>Whether to enable the bucketed group by from bucketed partitions/tables.</description>
+</property>
+
+<property>
+  <name>hive.join.emit.interval</name>
+  <value>1000</value>
+  <description>How many rows in the right-most join operand Hive should buffer before emitting the join result. </description>
+</property>
+
+<property>
+  <name>hive.join.cache.size</name>
+  <value>25000</value>
+  <description>How many rows in the joining tables (except the streaming table) should be cached in memory. </description>
+</property>
+
+<property>
+  <name>hive.mapjoin.bucket.cache.size</name>
+  <value>100</value>
+  <description>How many values in each keys in the map-joined table should be cached in memory. </description>
+</property>
+
+<property>
+  <name>hive.mapjoin.maxsize</name>
+  <value>100000</value>
+  <description>Maximum # of rows of the small table that can be handled by map-side join. If the size is reached and hive.task.progress is set, a fatal error counter is set and the job will be killed.</description>
+</property>
+
+<property>
+  <name>hive.mapjoin.cache.numrows</name>
+  <value>25000</value>
+  <description>How many rows should be cached by jdbm for map join. </description>
+</property>
+
+<property>
+  <name>hive.mapred.mode</name>
+  <value>nonstrict</value>
+  <description>The mode in which the hive operations are being performed. In strict mode, some risky queries are not allowed to run</description>
+</property>
+
+<property>
+  <name>hive.exec.script.maxerrsize</name>
+  <value>100000</value>
+  <description>Maximum number of bytes a script is allowed to emit to standard error (per map-reduce task). This prevents runaway scripts from filling logs partitions to capacity </description>
+</property>
+
+<property>
+  <name>hive.exec.script.allow.partial.consumption</name>
+  <value>false</value>
+  <description> When enabled, this option allows a user script to exit successfully without consuming all the data from the standard input.
+  </description>
+</property>
+
+<property>
+  <name>hive.script.operator.id.env.var</name>
+  <value>HIVE_SCRIPT_OPERATOR_ID</value>
+  <description> Name of the environment variable that holds the unique script operator ID in the user's transform function (the custom mapper/reducer that the user has specified in the query)
+  </description>
+</property>
+
+<property>
+  <name>hive.exec.compress.output</name>
+  <value>false</value>
+  <description> This controls whether the final outputs of a query (to a local/hdfs file or a hive table) is compressed. The compression codec and other options are determined from hadoop config variables mapred.output.compress* </description>
+</property>
+
+<property>
+  <name>hive.exec.compress.intermediate</name>
+  <value>false</value>
+  <description> This controls whether intermediate files produced by hive between multiple map-reduce jobs are compressed. The compression codec and other options are determined from hadoop config variables mapred.output.compress* </description>
+</property>
+
+<property>
+  <name>hive.exec.parallel</name>
+  <value>false</value>
+  <description>Whether to execute jobs in parallel</description>
+</property>
+
+<property>
+  <name>hive.hwi.war.file</name>
+  <value>lib/hive-hwi-0.5.0+20.war</value>
+  <description>This sets the path to the HWI war file, relative to ${HIVE_HOME}. </description>
+</property>
+
+<property>
+  <name>hive.hwi.listen.host</name>
+  <value>0.0.0.0</value>
+  <description>This is the host address the Hive Web Interface will listen on</description>
+</property>
+
+<property>
+  <name>hive.hwi.listen.port</name>
+  <value>9999</value>
+  <description>This is the port the Hive Web Interface will listen on</description>
+</property>
+
+<property>
+  <name>hive.exec.pre.hooks</name>
+  <value></value>
+  <description>Pre Execute Hook for Tests</description>
+</property>
+
+<property>
+  <name>hive.merge.mapfiles</name>
+  <value>true</value>
+  <description>Merge small files at the end of a map-only job</description>
+</property>
+
+<property>
+  <name>hive.merge.mapredfiles</name>
+  <value>false</value>
+  <description>Merge small files at the end of any job(map only or map-reduce)</description>
+</property>
+
+<property>
+  <name>hive.heartbeat.interval</name>
+  <value>1000</value>
+  <description>Send a heartbeat after this interval - used by mapjoin and filter operators</description>
+</property>
+
+<property>
+  <name>hive.merge.size.per.task</name>
+  <value>256000000</value>
+  <description>Size of merged files at the end of the job</description>
+</property>
+
+<property>
+  <name>hive.script.auto.progress</name>
+  <value>false</value>
+  <description>Whether Hive Tranform/Map/Reduce Clause should automatically send progress information to TaskTracker to avoid the task getting killed because of inactivity.  Hive sends progress information when the script is outputting to stderr.  This option removes the need of periodically producing stderr messages, but users should be cautious because this may prevent infinite loops in the scripts to be killed by TaskTracker.  </description>
+</property>
+
+<property>
+  <name>hive.script.serde</name>
+  <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
+  <description>The default serde for trasmitting input data to and reading output data from the user scripts. </description>
+</property>
+
+<property>
+  <name>hive.script.recordreader</name>
+  <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
+  <description>The default record reader for reading data from the user scripts. </description>
+</property>
+
+<property>
+  <name>hive.script.recordwriter</name>
+  <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
+  <description>The default record writer for writing data to the user scripts. </description>
+</property>
+
+<property>
+  <name>hive.input.format</name>
+  <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
+  <description>The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombinedHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombinedHiveInputFormat, it can always be manually set to HiveInputFormat. </description>
+</property>
+
+<property>
+  <name>hive.udtf.auto.progress</name>
+  <value>false</value>
+  <description>Whether Hive should automatically send progress information to TaskTracker when using UDTF's to prevent the task getting killed because of inactivity.  Users should be cautious because this may prevent TaskTracker from killing tasks with infinte loops.  </description>
+</property>
+
+<property>
+  <name>hive.mapred.reduce.tasks.speculative.execution</name>
+  <value>true</value>
+  <description>Whether speculative execution for reducers should be turned on. </description>
+</property>
+
+</configuration>

Added: incubator/oozie/trunk/examples/src/main/apps/hive/script.q
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/examples/src/main/apps/hive/script.q?rev=1208126&view=auto
==============================================================================
--- incubator/oozie/trunk/examples/src/main/apps/hive/script.q (added)
+++ incubator/oozie/trunk/examples/src/main/apps/hive/script.q Tue Nov 29 23:11:37 2011
@@ -0,0 +1,2 @@
+CREATE EXTERNAL TABLE test (a INT) STORED AS TEXTFILE LOCATION '${INPUT}';
+INSERT OVERWRITE DIRECTORY '${OUTPUT}' SELECT * FROM test;

Added: incubator/oozie/trunk/examples/src/main/apps/hive/workflow.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/examples/src/main/apps/hive/workflow.xml?rev=1208126&view=auto
==============================================================================
--- incubator/oozie/trunk/examples/src/main/apps/hive/workflow.xml (added)
+++ incubator/oozie/trunk/examples/src/main/apps/hive/workflow.xml Tue Nov 29 23:11:37 2011
@@ -0,0 +1,52 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<workflow-app xmlns="uri:oozie:workflow:0.2" name="hive-wf">
+    <start to="hive-node"/>
+
+    <action name="hive-node">
+        <hive xmlns="uri:oozie:hive-action:0.2">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <prepare>
+                <delete path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/hive"/>
+                <mkdir path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data"/>
+            </prepare>
+            <configuration>
+                <property>
+                    <name>mapred.job.queue.name</name>
+                    <value>${queueName}</value>
+                </property>
+                <property>
+                    <name>oozie.hive.defaults</name>
+                    <value>my-hive-default.xml</value>
+                </property>
+            </configuration>
+            <script>script.q</script>
+            <param>INPUT=/user/${wf:user()}/${examplesRoot}/input-data/table</param>
+            <param>OUTPUT=/user/${wf:user()}/${examplesRoot}/output-data/hive</param>
+        </hive>
+        <ok to="end"/>
+        <error to="fail"/>
+    </action>
+
+    <kill name="fail">
+        <message>Hive failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    <end name="end"/>
+</workflow-app>

Modified: incubator/oozie/trunk/examples/src/test/java/org/apache/oozie/example/TestLocalOozieExample.java
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/examples/src/test/java/org/apache/oozie/example/TestLocalOozieExample.java?rev=1208126&r1=1208125&r2=1208126&view=diff
==============================================================================
--- incubator/oozie/trunk/examples/src/test/java/org/apache/oozie/example/TestLocalOozieExample.java (original)
+++ incubator/oozie/trunk/examples/src/test/java/org/apache/oozie/example/TestLocalOozieExample.java Tue Nov 29 23:11:37 2011
@@ -55,7 +55,7 @@ public class TestLocalOozieExample exten
         conf.set("fs.default.name", getNameNodeUri());
         injectKerberosInfo(conf);
 
-// TODO restore this when getting rid of DoAs trick
+        // TODO restore this when getting rid of DoAs trick
 
 //        if (System.getProperty("oozie.test.kerberos", "off").equals("on")) {
 //            Configuration c = new Configuration();

Modified: incubator/oozie/trunk/pom.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/pom.xml?rev=1208126&r1=1208125&r2=1208126&view=diff
==============================================================================
--- incubator/oozie/trunk/pom.xml (original)
+++ incubator/oozie/trunk/pom.xml Tue Nov 29 23:11:37 2011
@@ -56,6 +56,7 @@
         <test.exclude.pattern>_</test.exclude.pattern>
 
         <oozie.test.dir>${project.build.directory}/test-data</oozie.test.dir>
+        <oozie.test.forkMode>once</oozie.test.forkMode>
 
         <maven.test.redirectTestOutputToFile>true</maven.test.redirectTestOutputToFile>
 
@@ -116,6 +117,22 @@
                 <enabled>true</enabled>
             </snapshots>
         </repository>
+        <repository>
+            <id>apache.snapshots.repo</id>
+            <url>https://repository.apache.org/content/groups/snapshots</url>
+            <name>Apache Snapshots Repository</name>
+             <snapshots>
+                 <enabled>true</enabled>
+             </snapshots>
+         </repository>
+    <repository>
+      <id>datanucleus</id>
+      <url>http://www.datanucleus.org/downloads/maven2</url>
+      <name>Datanucleus</name>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
     </repositories>
 
     <pluginRepositories>
@@ -177,6 +194,11 @@
             </dependency>
             <dependency>
                 <groupId>org.apache.oozie</groupId>
+                <artifactId>oozie-sharelib-hive</artifactId>
+                <version>3.2.0-SNAPSHOT</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.oozie</groupId>
                 <artifactId>oozie-docs</artifactId>
                 <version>3.2.0-SNAPSHOT</version>
                 <type>war</type>
@@ -227,6 +249,12 @@
             </dependency>
 
             <dependency>
+                <groupId>commons-lang</groupId>
+                <artifactId>commons-lang</artifactId>
+                <version>2.4</version>
+            </dependency>
+
+            <dependency>
                 <groupId>org.apache.openjpa</groupId>
                 <artifactId>openjpa-jdbc</artifactId>
                 <version>2.1.0</version>
@@ -327,6 +355,35 @@
             </dependency>
 
             <dependency>
+                <groupId>org.apache.thrift</groupId>
+                <artifactId>thrift</artifactId>
+                <version>0.5.0-cdh</version>
+            </dependency>
+
+            <dependency>
+                <groupId>org.apache.hive</groupId>
+                <artifactId>hive-cli</artifactId>
+                <version>0.9.0-SNAPSHOT</version>
+                <exclusions>
+                    <exclusion>
+                        <groupId>hadoop</groupId>
+                        <artifactId>core</artifactId>
+                    </exclusion>
+		</exclusions>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.hive</groupId>
+                <artifactId>hive-contrib</artifactId>
+                <version>0.9.0-SNAPSHOT</version>
+                <exclusions>
+                    <exclusion>
+                        <groupId>hadoop</groupId>
+                        <artifactId>core</artifactId>
+                    </exclusion>
+                </exclusions>
+            </dependency>
+
+            <dependency>
                 <groupId>org.slf4j</groupId>
                 <artifactId>slf4j-log4j12</artifactId>
                 <version>1.4.3</version>
@@ -619,16 +676,22 @@
                 <artifactId>maven-surefire-plugin</artifactId>
                 <configuration>
                     <forkMode>${oozie.test.forkMode}</forkMode>
-                    <argLine>-Xmx1024m</argLine>
-                    <systemPropertiesVariables>
+                    <argLine>-Xmx1024m -da</argLine>
+                    <systemPropertyVariables>
                         <hadoop.log.dir>/tmp</hadoop.log.dir>
-                        <oozie.test.data.dir>${oozie.test.data.dir}</oozie.test.data.dir>
                         <oozie.test.db.host>${oozie.test.db.host}</oozie.test.db.host>
                         <oozie.test.config.file>${oozie.test.config.file}</oozie.test.config.file>
-                    </systemPropertiesVariables>
+                        <oozie.data.dir>${oozie.data.dir}</oozie.data.dir>
+                    </systemPropertyVariables>
+                    <environmentVariables>
+                      <HADOOP_HOME>dummy</HADOOP_HOME>
+                    </environmentVariables>
                     <excludes>
                         <exclude>**/${test.exclude}.java</exclude>
                         <exclude>${test.exclude.pattern}</exclude>
+
+                        <!-- See 'testHive' profile in core/pom.xml -->
+                        <exclude>**/TestHive*.java</exclude>
                     </excludes>
                 </configuration>
             </plugin>

Modified: incubator/oozie/trunk/release-log.txt
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/release-log.txt?rev=1208126&r1=1208125&r2=1208126&view=diff
==============================================================================
--- incubator/oozie/trunk/release-log.txt (original)
+++ incubator/oozie/trunk/release-log.txt Tue Nov 29 23:11:37 2011
@@ -1,5 +1,6 @@
 -- Oozie 3.2.0 release
 
+OOZIE-68 Add Hive action. (tucu)
 OOZIE-608 Fix test failure for testCoordChangeXCommand, testCoordChangeEndTime Unit
 OOZIE-610 Oozie system share lib should have jars per action type. (tucu)
 OOZIE-565 Make Oozie compile against Hadoop 0.23. (tucu)

Added: incubator/oozie/trunk/sharelib/hive/pom.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/sharelib/hive/pom.xml?rev=1208126&view=auto
==============================================================================
--- incubator/oozie/trunk/sharelib/hive/pom.xml (added)
+++ incubator/oozie/trunk/sharelib/hive/pom.xml Tue Nov 29 23:11:37 2011
@@ -0,0 +1,127 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.oozie</groupId>
+        <artifactId>oozie-main</artifactId>
+        <version>3.2.0-SNAPSHOT</version>
+        <relativePath>../..</relativePath>
+    </parent>
+    <artifactId>oozie-sharelib-hive</artifactId>
+    <description>Oozie Share Lib Hive</description>
+    <name>Oozie Share Lib Hive</name>
+    <packaging>jar</packaging>
+
+    <properties>
+        <sharelib.action.postfix>hive</sharelib.action.postfix>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-cli</artifactId>
+            <scope>compile</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.hive</groupId>
+                    <artifactId>hive-hwi</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.hive</groupId>
+                    <artifactId>hive-jdbc</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.hive</groupId>
+                    <artifactId>hive-anttasks</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>junit</groupId>
+                    <artifactId>junit</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>log4j</groupId>
+                    <artifactId>log4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>commons-codec</groupId>
+                    <artifactId>commons-codec</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>commons-logging</groupId>
+                    <artifactId>commons-logging</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>commons-logging</groupId>
+                    <artifactId>commons-logging-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.mockito</groupId>
+                    <artifactId>mockito-all</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>javax.servlet</groupId>
+                    <artifactId>servlet-api</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-contrib</artifactId>
+            <scope>runtime</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.antlr</groupId>
+            <artifactId>antlr-runtime</artifactId>
+            <version>3.0.1</version>
+            <scope>compile</scope>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+            </resource>
+        </resources>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-deploy-plugin</artifactId>
+                <configuration>
+                    <skip>true</skip>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <configuration>
+                    <finalName>partial-sharelib</finalName>
+                    <appendAssemblyId>false</appendAssemblyId>
+                    <descriptors>
+                        <descriptor>../../src/main/assemblies/partial-sharelib.xml</descriptor>
+                    </descriptors>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
+

Modified: incubator/oozie/trunk/sharelib/pom.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/sharelib/pom.xml?rev=1208126&r1=1208125&r2=1208126&view=diff
==============================================================================
--- incubator/oozie/trunk/sharelib/pom.xml (original)
+++ incubator/oozie/trunk/sharelib/pom.xml Tue Nov 29 23:11:37 2011
@@ -31,6 +31,7 @@
     <modules>
         <module>streaming</module>
         <module>pig</module>
+        <module>hive</module>
     </modules>
 
     <build>

Modified: incubator/oozie/trunk/src/main/assemblies/sharelib.xml
URL: http://svn.apache.org/viewvc/incubator/oozie/trunk/src/main/assemblies/sharelib.xml?rev=1208126&r1=1208125&r2=1208126&view=diff
==============================================================================
--- incubator/oozie/trunk/src/main/assemblies/sharelib.xml (original)
+++ incubator/oozie/trunk/src/main/assemblies/sharelib.xml Tue Nov 29 23:11:37 2011
@@ -39,6 +39,10 @@
             <directory>${basedir}/streaming/target/partial-sharelib</directory>
             <outputDirectory>/</outputDirectory>
         </fileSet>
+        <fileSet>
+            <directory>${basedir}/hive/target/partial-sharelib</directory>
+            <outputDirectory>/</outputDirectory>
+        </fileSet>
     </fileSets>
 
 </assembly>