You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@oozie.apache.org by Morgrim Murdargent <mo...@gmail.com> on 2016/03/29 16:41:09 UTC
Hive action in oozie using tez engine
Hello !
I have a very simple workflow, using a database to create a table.
Here is my property file (I replaced the real values for security reasons) :
###
#
------------------------------------------------------------------------------
# Environment
#
------------------------------------------------------------------------------
nameNode=hdfs://<NAMENODE>:8020
jobTracker=<RESOURCE MANAGER>:8050
kerberosRealm=<KERBEROS REALM>
queueName=<myqueue>
hiveMetastoreHost=<HIVEMETASTORE_HOST>
#
------------------------------------------------------------------------------
# Application
#
------------------------------------------------------------------------------
appRoot=${nameNode}/tmp/test
oozie.wf.application.path=${appRoot}/hive.xml
#
------------------------------------------------------------------------------
# Oozie
#
------------------------------------------------------------------------------
oozie.use.system.libpath=true
oozie.wf.rerun.failnodes=true
#
------------------------------------------------------------------------------
# Kerberos
#
------------------------------------------------------------------------------
hcatMetastoreUri=thrift://${hiveMetastoreHost}:9083
hcatMetastorePrincipal=hive/_HOST@${kerberosRealm}
hiveConfPath=${appRoot}/hive-site.xml
#
------------------------------------------------------------------------------
# Tez
#
------------------------------------------------------------------------------
tezLibPath=${nameNode}/hdp/apps/2.3.2.0-2950/tez/
###
Here is the xml file for the workflow :
###
<workflow-app xmlns="uri:oozie:workflow:0.5" name="TEST_HIVE_ACTION">
<credentials>
<credential name="hive_credentials" type="hcat">
<property>
<name>hcat.metastore.uri</name>
<value>${hcatMetastoreUri}</value>
</property>
<property>
<name>hcat.metastore.principal</name>
<value>${hcatMetastorePrincipal}</value>
</property>
</credential>
</credentials>
<start to="init-hive" />
<action name="init-hive" cred="hive_credentials">
<hive xmlns="uri:oozie:hive-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<job-xml>${hiveConfPath}</job-xml>
<configuration>
<property>
<name>oozie.hive.defaults</name>
<value>${hiveConfPath}</value>
</property>
<property>
<name>hive.execution.engine</name>
<value>tez</value>
</property>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>tez.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>tez.lib.uris</name>
<value>${tezLibPath}</value>
</property>
</configuration>
<script>hive.hql</script>
<file>hive.hql#hive.hql</file>
</hive>
<ok to="end"/>
<error to="fail"/>
</action>
<kill name="fail">
<message>Script failed, error
message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<end name="end"/>
</workflow-app>
###
Here is the hql file (very simple, it is just for the test ;-)) :
###
use mabdd;
create table if not exists test_tez_via_oozie (test string);
###
Here is the hive-site.xml
###
<configuration>
<property>
<name>ambari.hive.db.schema.name</name>
<value>hive</value>
</property>
<property>
<name>datanucleus.autoCreateSchema</name>
<value>false</value>
</property>
<property>
<name>datanucleus.cache.level2.type</name>
<value>none</value>
</property>
<property>
<name>fs.file.impl.disable.cache</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.join</name>
<value>false</value>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask.size</name>
<value>999999668</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join.to.mapjoin</name>
<value>false</value>
</property>
<property>
<name>hive.cbo.enable</name>
<value>true</value>
</property>
<property>
<name>hive.cli.print.header</name>
<value>false</value>
</property>
<property>
<name>hive.cluster.delegation.token.store.class</name>
<value>org.apache.hadoop.hive.thrift.ZooKeeperTokenStore</value>
</property>
<property>
<name>hive.cluster.delegation.token.store.zookeeper.connectString</name>
<value><srv1>:2181,<srv2>:2181,<rv3>:2181</value>
</property>
<property>
<name>hive.cluster.delegation.token.store.zookeeper.znode</name>
<value>/hive/cluster/delegation</value>
</property>
<property>
<name>hive.compactor.abortedtxn.threshold</name>
<value>1000</value>
</property>
<property>
<name>hive.compactor.check.interval</name>
<value>300L</value>
</property>
<property>
<name>hive.compactor.delta.num.threshold</name>
<value>10</value>
</property>
<property>
<name>hive.compactor.delta.pct.threshold</name>
<value>0.1f</value>
</property>
<property>
<name>hive.compactor.initiator.on</name>
<value>false</value>
</property>
<property>
<name>hive.compactor.worker.threads</name>
<value>0</value>
</property>
<property>
<name>hive.compactor.worker.timeout</name>
<value>86400L</value>
</property>
<property>
<name>hive.compute.query.using.stats</name>
<value>true</value>
</property>
<property>
<name>hive.conf.restricted.list</name>
<value>hive.security.authenticator.manager,hive.security.authorization.manager,hive.users.in.admin.role</value>
</property>
<property>
<name>hive.convert.join.bucket.mapjoin.tez</name>
<value>false</value>
</property>
<property>
<name>hive.default.fileformat</name>
<value>TextFile</value>
</property>
<property>
<name>hive.default.fileformat.managed</name>
<value>TextFile</value>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>false</value>
</property>
<property>
<name>hive.enforce.sorting</name>
<value>true</value>
</property>
<property>
<name>hive.enforce.sortmergebucketmapjoin</name>
<value>true</value>
</property>
<property>
<name>hive.exec.compress.intermediate</name>
<value>false</value>
</property>
<property>
<name>hive.exec.compress.output</name>
<value>false</value>
</property>
<property>
<name>hive.exec.dynamic.partition</name>
<value>true</value>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>strict</value>
</property>
<property>
<name>hive.exec.failure.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>hive.exec.max.created.files</name>
<value>100000</value>
</property>
<property>
<name>hive.exec.max.dynamic.partitions</name>
<value>5000</value>
</property>
<property>
<name>hive.exec.max.dynamic.partitions.pernode</name>
<value>2000</value>
</property>
<property>
<name>hive.exec.orc.compression.strategy</name>
<value>SPEED</value>
</property>
<property>
<name>hive.exec.orc.default.compress</name>
<value>ZLIB</value>
</property>
<property>
<name>hive.exec.orc.default.stripe.size</name>
<value>67108864</value>
</property>
<property>
<name>hive.exec.orc.encoding.strategy</name>
<value>SPEED</value>
</property>
<property>
<name>hive.exec.parallel</name>
<value>false</value>
</property>
<property>
<name>hive.exec.parallel.thread.number</name>
<value>8</value>
</property>
<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>hive.exec.reducers.bytes.per.reducer</name>
<value>67108864</value>
</property>
<property>
<name>hive.exec.reducers.max</name>
<value>1009</value>
</property>
<property>
<name>hive.exec.scratchdir</name>
<value>/tmp/hive</value>
</property>
<property>
<name>hive.exec.submit.local.task.via.child</name>
<value>true</value>
</property>
<property>
<name>hive.exec.submitviachild</name>
<value>false</value>
</property>
<property>
<name>hive.execution.engine</name>
<value>mr</value>
</property>
<property>
<name>hive.fetch.task.aggr</name>
<value>false</value>
</property>
<property>
<name>hive.fetch.task.conversion</name>
<value>more</value>
</property>
<property>
<name>hive.fetch.task.conversion.threshold</name>
<value>1073741824</value>
</property>
<property>
<name>hive.limit.optimize.enable</name>
<value>true</value>
</property>
<property>
<name>hive.limit.pushdown.memory.usage</name>
<value>0.04</value>
</property>
<property>
<name>hive.map.aggr</name>
<value>true</value>
</property>
<property>
<name>hive.map.aggr.hash.force.flush.memory.threshold</name>
<value>0.9</value>
</property>
<property>
<name>hive.map.aggr.hash.min.reduction</name>
<value>0.5</value>
</property>
<property>
<name>hive.map.aggr.hash.percentmemory</name>
<value>0.5</value>
</property>
<property>
<name>hive.mapjoin.bucket.cache.size</name>
<value>10000</value>
</property>
<property>
<name>hive.mapjoin.optimized.hashtable</name>
<value>true</value>
</property>
<property>
<name>hive.mapred.reduce.tasks.speculative.execution</name>
<value>false</value>
</property>
<property>
<name>hive.merge.mapfiles</name>
<value>true</value>
</property>
<property>
<name>hive.merge.mapredfiles</name>
<value>false</value>
</property>
<property>
<name>hive.merge.orcfile.stripe.level</name>
<value>true</value>
</property>
<property>
<name>hive.merge.rcfile.block.level</name>
<value>true</value>
</property>
<property>
<name>hive.merge.size.per.task</name>
<value>256000000</value>
</property>
<property>
<name>hive.merge.smallfiles.avgsize</name>
<value>16000000</value>
</property>
<property>
<name>hive.merge.tezfiles</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.authorization.storage.checks</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.cache.pinobjtypes</name>
<value>Table,Database,Type,FieldSchema,Order</value>
</property>
<property>
<name>hive.metastore.client.connect.retry.delay</name>
<value>5s</value>
</property>
<property>
<name>hive.metastore.client.socket.timeout</name>
<value>1800s</value>
</property>
<property>
<name>hive.metastore.connect.retries</name>
<value>24</value>
</property>
<property>
<name>hive.metastore.execute.setugi</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.failure.retries</name>
<value>24</value>
</property>
<property>
<name>hive.metastore.kerberos.keytab.file</name>
<value>/etc/security/keytabs/hive.service.keytab</value>
</property>
<property>
<name>hive.metastore.kerberos.principal</name>
<value>hive/_HOST@<KRB_REALM></value>
</property>
<property>
<name>hive.metastore.pre.event.listeners</name>
<value>org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener</value>
</property>
<property>
<name>hive.metastore.sasl.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.server.max.threads</name>
<value>100000</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://<db_srv>:9083</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/apps/hive/warehouse</value>
</property>
<property>
<name>hive.optimize.bucketmapjoin</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.bucketmapjoin.sortedmerge</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.constant.propagation</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.index.filter</name>
<value>false</value>
</property>
<property>
<name>hive.optimize.metadataonly</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.null.scan</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.reducededuplication</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.reducededuplication.min.reducer</name>
<value>1</value>
</property>
<property>
<name>hive.optimize.sort.dynamic.partition</name>
<value>false</value>
</property>
<property>
<name>hive.orc.compute.splits.num.threads</name>
<value>10</value>
</property>
<property>
<name>hive.orc.splits.include.file.footer</name>
<value>false</value>
</property>
<property>
<name>hive.prewarm.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.prewarm.numcontainers</name>
<value>3</value>
</property>
<property>
<name>hive.security.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.ProxyUserAuthenticator</value>
</property>
<property>
<name>hive.security.authorization.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.security.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdConfOnlyAuthorizerFactory</value>
</property>
<property>
<name>hive.security.metastore.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator</value>
</property>
<property>
<name>hive.security.metastore.authorization.auth.reads</name>
<value>true</value>
</property>
<property>
<name>hive.security.metastore.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
</property>
<property>
<name>hive.server2.allow.user.substitution</name>
<value>true</value>
</property>
<property>
<name>hive.server2.authentication</name>
<value>KERBEROS</value>
</property>
<property>
<name>hive.server2.authentication.kerberos.keytab</name>
<value>/etc/security/keytabs/hive.service.keytab</value>
</property>
<property>
<name>hive.server2.authentication.kerberos.principal</name>
<value>hive/_HOST@<KRB_REALM></value>
</property>
<property>
<name>hive.server2.authentication.spnego.keytab</name>
<value>/etc/security/keytabs/spnego.service.keytab</value>
</property>
<property>
<name>hive.server2.authentication.spnego.principal</name>
<value>HTTP/_HOST@<KRB_REALM></value>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>true</value>
</property>
<property>
<name>hive.server2.keystore.password</name>
<value>hive_server2</value>
</property>
<property>
<name>hive.server2.keystore.path</name>
<value>/etc/hive/keystores/hive2.jks</value>
</property>
<property>
<name>hive.server2.logging.operation.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.server2.logging.operation.log.location</name>
<value>${system:java.io.tmpdir}/${system:user.name
}/operation_logs</value>
</property>
<property>
<name>hive.server2.support.dynamic.service.discovery</name>
<value>true</value>
</property>
<property>
<name>hive.server2.table.type.mapping</name>
<value>CLASSIC</value>
</property>
<property>
<name>hive.server2.tez.default.queues</name>
<value>default</value>
</property>
<property>
<name>hive.server2.tez.initialize.default.sessions</name>
<value>false</value>
</property>
<property>
<name>hive.server2.tez.sessions.per.default.queue</name>
<value>1</value>
</property>
<property>
<name>hive.server2.thrift.http.path</name>
<value>cliservice</value>
</property>
<property>
<name>hive.server2.thrift.http.port</name>
<value>10001</value>
</property>
<property>
<name>hive.server2.thrift.max.worker.threads</name>
<value>500</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>hive.server2.thrift.sasl.qop</name>
<value>auth</value>
</property>
<property>
<name>hive.server2.transport.mode</name>
<value>binary</value>
</property>
<property>
<name>hive.server2.use.SSL</name>
<value>false</value>
</property>
<property>
<name>hive.server2.zookeeper.namespace</name>
<value>hiveserver2</value>
</property>
<property>
<name>hive.smbjoin.cache.rows</name>
<value>10000</value>
</property>
<property>
<name>hive.stats.autogather</name>
<value>true</value>
</property>
<property>
<name>hive.stats.dbclass</name>
<value>fs</value>
</property>
<property>
<name>hive.stats.fetch.column.stats</name>
<value>true</value>
</property>
<property>
<name>hive.stats.fetch.partition.stats</name>
<value>true</value>
</property>
<property>
<name>hive.support.concurrency</name>
<value>false</value>
</property>
<property>
<name>hive.tez.auto.reducer.parallelism</name>
<value>true</value>
</property>
<property>
<name>hive.tez.container.size</name>
<value>2048</value>
</property>
<property>
<name>hive.tez.cpu.vcores</name>
<value>-1</value>
</property>
<property>
<name>hive.tez.dynamic.partition.pruning</name>
<value>true</value>
</property>
<property>
<name>hive.tez.dynamic.partition.pruning.max.data.size</name>
<value>104857600</value>
</property>
<property>
<name>hive.tez.dynamic.partition.pruning.max.event.size</name>
<value>1048576</value>
</property>
<property>
<name>hive.tez.input.format</name>
<value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
</property>
<property>
<name>hive.tez.java.opts</name>
<value>-server -Xmx1639m -Djava.net.preferIPv4Stack=true
-XX:NewRatio=8 -XX:+UseNUMA -XX:+UseParallelGC -XX:+PrintGCDetails
-verbose:gc -XX:+PrintGCTimeStamps</value>
</property>
<property>
<name>hive.tez.log.level</name>
<value>INFO</value>
</property>
<property>
<name>hive.tez.max.partition.factor</name>
<value>2.0</value>
</property>
<property>
<name>hive.tez.min.partition.factor</name>
<value>0.25</value>
</property>
<property>
<name>hive.tez.smb.number.waves</name>
<value>0.5</value>
</property>
<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager</value>
</property>
<property>
<name>hive.txn.max.open.batch</name>
<value>1000</value>
</property>
<property>
<name>hive.txn.timeout</name>
<value>300</value>
</property>
<property>
<name>hive.user.install.directory</name>
<value>/tmp/</value>
</property>
<property>
<name>hive.vectorized.execution.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.vectorized.execution.reduce.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.vectorized.groupby.checkinterval</name>
<value>1024</value>
</property>
<property>
<name>hive.vectorized.groupby.flush.percent</name>
<value>0.1</value>
</property>
<property>
<name>hive.vectorized.groupby.maxentries</name>
<value>100000</value>
</property>
<property>
<name>hive.warehouse.subdir.inherit.perms</name>
<value>true</value>
</property>
<property>
<name>hive.zookeeper.client.port</name>
<value>2181</value>
</property>
<property>
<name>hive.zookeeper.namespace</name>
<value>hive_zookeeper_namespace</value>
</property>
<property>
<name>hive.zookeeper.quorum</name>
<value><srv1>:2181,<srv2>:2181,<srv3>:2181</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.postgresql.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:postgresql://<db_srv>:<port>/hive</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
</property>
</configuration>
###
When I run the workflow, I get the following error message :
###
# oozie job -oozie=http://<OOZIE_SRV>:<OOZIE_PORT>/oozie -info
0004487-160314163828133-oozie-oozi-W -verbose
Job ID : 0004487-160314163828133-oozie-oozi-W
------------------------------------------------------------------------------------------------------------------------------------
Workflow Name : TEST_HIVE_ACTION
App Path : hdfs://<NAMENODE_SRV>:8020/tmp/test/hive.xml
Status : KILLED
Run : 0
User : <myuser>
Group : -
Created : 2016-03-29 13:37:46 GMT
Started : 2016-03-29 13:37:46 GMT
Last Modified : 2016-03-29 13:38:11 GMT
Ended : 2016-03-29 13:38:11 GMT
CoordAction ID: -
Actions
------------------------------------------------------------------------------------------------------------------------------------
ID Console URL Error Code Error Message External ID
External Status Name RetriesTracker URI Type Started Status Ended
------------------------------------------------------------------------------------------------------------------------------------
0004487-160314163828133-oozie-oozi-W@:start: - - -
- OK :start: 0 -:START: 2016-03-29 13:37:46 GMT OK
2016-03-29 13:37:46 GMT
------------------------------------------------------------------------------------------------------------------------------------
0004487-160314163828133-oozie-oozi-W@init-hive
http://<NAMENODE_SRV>:8088/proxy/application_1457363931356_28038/
JA018 org.apache.tez.dag.api.SessionNotRunning: TezSession has already
shutdown. Application application_1457363931356_28039 failed 2 times due to
AM Container for appattempt_1457363931356_28039_000002 exited with
exitCode: 1
For more detailed output, check application tracking
page:http://<NAMENODE_SRV>:8088/cluster/app/application_1457363931356_28039Then,
click on links to logs of each attempt.
Diagnostics: Exception from container-launch.
Container id: container_e19_1457363931356 job_1457363931356_28038
FAILED/KILLED init-hive 0<NAMENODE_SRV>:8050 hive 2016-03-29
13:37:46 GMT ERROR 2016-03-29 13:38:11 GMT
------------------------------------------------------------------------------------------------------------------------------------
0004487-160314163828133-oozie-oozi-W@fail - E0729 Script
failed, error message[org.apache.tez.dag.api.SessionNotRunning: TezSession
has already shutdown. Application application_1457363931356_28039 failed 2
times due to AM Container for appattempt_1457363931356_28039_000002 exited
with exitCode: 1
For more detailed output, check application tracking
page:http://<NAMENODE_SRV>:<RM_PORT>/cluster/app/application_1457363931356_28039Then,
click on links to logs of each attempt.
Diagnostics: Exception from container-launch.
Container id - OK fail 0 - :KILL: 2016-03-29
13:38:11 GMT OK 2016-03-29 13:38:11 GMT
------------------------------------------------------------------------------------------------------------------------------------
###
And when I check the app attempt, here is what I found :
###
Container: container_e19_1457363931356_28039_01_000001 on <DATANODE>_45454
==================================================================================================
LogType:stderr
Log Upload Time:Tue Mar 29 15:38:10 +0200 2016
LogLength:77
Log Contents:
Error: Could not find or load main class org.apache.tez.dag.app.DAGAppMaster
End of LogType:stderr
LogType:stdout
Log Upload Time:Tue Mar 29 15:38:10 +0200 2016
LogLength:949
Log Contents:
Heap
PSYoungGen total 602112K, used 10322K [0x00000007d5500000,
0x00000007ff500000, 0x0000000800000000)
eden space 516096K, 2% used
[0x00000007d5500000,0x00000007d5f14898,0x00000007f4d00000)
lgrp 0 space 258048K, 4% used
[0x00000007d5500000,0x00000007d5f14898,0x00000007e5100000)
lgrp 1 space 258048K, 0% used
[0x00000007e5100000,0x00000007e5100000,0x00000007f4d00000)
from space 86016K, 0% used
[0x00000007fa100000,0x00000007fa100000,0x00000007ff500000)
to space 86016K, 0% used
[0x00000007f4d00000,0x00000007f4d00000,0x00000007fa100000)
ParOldGen total 1375232K, used 0K [0x0000000780000000,
0x00000007d3f00000, 0x00000007d5500000)
object space 1375232K, 0% used
[0x0000000780000000,0x0000000780000000,0x00000007d3f00000)
PSPermGen total 21504K, used 2874K [0x0000000775a00000,
0x0000000776f00000, 0x0000000780000000)
object space 21504K, 13% used
[0x0000000775a00000,0x0000000775cce858,0x0000000776f00000)
End of LogType:stdout
###
So this is the following error which blocks my workflow :
###
Could not find or load main class org.apache.tez.dag.app.DAGAppMaster
###
I don't understand why I have this error message, because I set the
tez.lib.uris in my workflow.xml.
In the path for Tez libs, I have a tar.gz file containing all the necessary
jars like the tez-dag containing this class.
So I send you this mail just to know how to use Tez as execution engine in
an oozie workflow. May someone help me please ?
Best regards.
Morgrim.
Re: Hive action in oozie using tez engine
Posted by Morgrim Murdargent <mo...@gmail.com>.
Hello everyone.
I think the problem was coming from my variable tezLibPath.
tezLibPath=${nameNode}/hdp/apps/2.3.2.0-2950/tez/
I was putting a path, and here this is a path + a file which is expected.
When I retried with the following path, it worked :
tezLibPath=${nameNode}/hdp/apps/2.3.2.0-2950/tez/tez.tar.gz
Hope it will help ^_^.
Best regards.
Morgrim.
On Tue, Mar 29, 2016 at 4:44 PM, Morgrim Murdargent <morgrim.oozie@gmail.com
> wrote:
> As a note, I don't have this error when oozie is out of the equation.
>
> Best regards.
>
> Hope you can help me.
>
> Morgrim.
>
> On Tue, Mar 29, 2016 at 4:41 PM, Morgrim Murdargent <
> morgrim.oozie@gmail.com> wrote:
>
>> Hello !
>>
>> I have a very simple workflow, using a database to create a table.
>>
>> Here is my property file (I replaced the real values for security
>> reasons) :
>> ###
>> #
>> ------------------------------------------------------------------------------
>> # Environment
>> #
>> ------------------------------------------------------------------------------
>> nameNode=hdfs://<NAMENODE>:8020
>> jobTracker=<RESOURCE MANAGER>:8050
>> kerberosRealm=<KERBEROS REALM>
>> queueName=<myqueue>
>> hiveMetastoreHost=<HIVEMETASTORE_HOST>
>>
>> #
>> ------------------------------------------------------------------------------
>> # Application
>> #
>> ------------------------------------------------------------------------------
>> appRoot=${nameNode}/tmp/test
>> oozie.wf.application.path=${appRoot}/hive.xml
>>
>> #
>> ------------------------------------------------------------------------------
>> # Oozie
>> #
>> ------------------------------------------------------------------------------
>> oozie.use.system.libpath=true
>> oozie.wf.rerun.failnodes=true
>>
>> #
>> ------------------------------------------------------------------------------
>> # Kerberos
>> #
>> ------------------------------------------------------------------------------
>> hcatMetastoreUri=thrift://${hiveMetastoreHost}:9083
>> hcatMetastorePrincipal=hive/_HOST@${kerberosRealm}
>> hiveConfPath=${appRoot}/hive-site.xml
>>
>> #
>> ------------------------------------------------------------------------------
>> # Tez
>> #
>> ------------------------------------------------------------------------------
>> tezLibPath=${nameNode}/hdp/apps/2.3.2.0-2950/tez/
>> ###
>>
>> Here is the xml file for the workflow :
>> ###
>> <workflow-app xmlns="uri:oozie:workflow:0.5" name="TEST_HIVE_ACTION">
>> <credentials>
>> <credential name="hive_credentials" type="hcat">
>> <property>
>> <name>hcat.metastore.uri</name>
>> <value>${hcatMetastoreUri}</value>
>> </property>
>> <property>
>> <name>hcat.metastore.principal</name>
>> <value>${hcatMetastorePrincipal}</value>
>> </property>
>> </credential>
>> </credentials>
>> <start to="init-hive" />
>> <action name="init-hive" cred="hive_credentials">
>> <hive xmlns="uri:oozie:hive-action:0.2">
>> <job-tracker>${jobTracker}</job-tracker>
>> <name-node>${nameNode}</name-node>
>> <job-xml>${hiveConfPath}</job-xml>
>> <configuration>
>> <property>
>> <name>oozie.hive.defaults</name>
>> <value>${hiveConfPath}</value>
>> </property>
>> <property>
>> <name>hive.execution.engine</name>
>> <value>tez</value>
>> </property>
>> <property>
>> <name>mapreduce.job.queuename</name>
>> <value>${queueName}</value>
>> </property>
>> <property>
>> <name>tez.queue.name</name>
>> <value>${queueName}</value>
>> </property>
>> <property>
>> <name>tez.lib.uris</name>
>> <value>${tezLibPath}</value>
>> </property>
>> </configuration>
>> <script>hive.hql</script>
>> <file>hive.hql#hive.hql</file>
>> </hive>
>> <ok to="end"/>
>> <error to="fail"/>
>> </action>
>>
>> <kill name="fail">
>> <message>Script failed, error
>> message[${wf:errorMessage(wf:lastErrorNode())}]</message>
>> </kill>
>>
>> <end name="end"/>
>> </workflow-app>
>> ###
>>
>> Here is the hql file (very simple, it is just for the test ;-)) :
>> ###
>> use mabdd;
>> create table if not exists test_tez_via_oozie (test string);
>> ###
>>
>> Here is the hive-site.xml
>> ###
>> <configuration>
>>
>> <property>
>> <name>ambari.hive.db.schema.name</name>
>> <value>hive</value>
>> </property>
>>
>> <property>
>> <name>datanucleus.autoCreateSchema</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>datanucleus.cache.level2.type</name>
>> <value>none</value>
>> </property>
>>
>> <property>
>> <name>fs.file.impl.disable.cache</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.auto.convert.join</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.auto.convert.join.noconditionaltask</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.auto.convert.join.noconditionaltask.size</name>
>> <value>999999668</value>
>> </property>
>>
>> <property>
>> <name>hive.auto.convert.sortmerge.join</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.auto.convert.sortmerge.join.to.mapjoin</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.cbo.enable</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.cli.print.header</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.cluster.delegation.token.store.class</name>
>> <value>org.apache.hadoop.hive.thrift.ZooKeeperTokenStore</value>
>> </property>
>>
>> <property>
>>
>> <name>hive.cluster.delegation.token.store.zookeeper.connectString</name>
>> <value><srv1>:2181,<srv2>:2181,<rv3>:2181</value>
>> </property>
>>
>> <property>
>> <name>hive.cluster.delegation.token.store.zookeeper.znode</name>
>> <value>/hive/cluster/delegation</value>
>> </property>
>>
>> <property>
>> <name>hive.compactor.abortedtxn.threshold</name>
>> <value>1000</value>
>> </property>
>>
>> <property>
>> <name>hive.compactor.check.interval</name>
>> <value>300L</value>
>> </property>
>>
>> <property>
>> <name>hive.compactor.delta.num.threshold</name>
>> <value>10</value>
>> </property>
>>
>> <property>
>> <name>hive.compactor.delta.pct.threshold</name>
>> <value>0.1f</value>
>> </property>
>>
>> <property>
>> <name>hive.compactor.initiator.on</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.compactor.worker.threads</name>
>> <value>0</value>
>> </property>
>>
>> <property>
>> <name>hive.compactor.worker.timeout</name>
>> <value>86400L</value>
>> </property>
>>
>> <property>
>> <name>hive.compute.query.using.stats</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.conf.restricted.list</name>
>>
>> <value>hive.security.authenticator.manager,hive.security.authorization.manager,hive.users.in.admin.role</value>
>> </property>
>>
>> <property>
>> <name>hive.convert.join.bucket.mapjoin.tez</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.default.fileformat</name>
>> <value>TextFile</value>
>> </property>
>>
>> <property>
>> <name>hive.default.fileformat.managed</name>
>> <value>TextFile</value>
>> </property>
>>
>> <property>
>> <name>hive.enforce.bucketing</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.enforce.sorting</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.enforce.sortmergebucketmapjoin</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.compress.intermediate</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.compress.output</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.dynamic.partition</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.dynamic.partition.mode</name>
>> <value>strict</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.failure.hooks</name>
>> <value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.max.created.files</name>
>> <value>100000</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.max.dynamic.partitions</name>
>> <value>5000</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.max.dynamic.partitions.pernode</name>
>> <value>2000</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.orc.compression.strategy</name>
>> <value>SPEED</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.orc.default.compress</name>
>> <value>ZLIB</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.orc.default.stripe.size</name>
>> <value>67108864</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.orc.encoding.strategy</name>
>> <value>SPEED</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.parallel</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.parallel.thread.number</name>
>> <value>8</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.post.hooks</name>
>> <value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.pre.hooks</name>
>> <value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.reducers.bytes.per.reducer</name>
>> <value>67108864</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.reducers.max</name>
>> <value>1009</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.scratchdir</name>
>> <value>/tmp/hive</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.submit.local.task.via.child</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.exec.submitviachild</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.execution.engine</name>
>> <value>mr</value>
>> </property>
>>
>> <property>
>> <name>hive.fetch.task.aggr</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.fetch.task.conversion</name>
>> <value>more</value>
>> </property>
>>
>> <property>
>> <name>hive.fetch.task.conversion.threshold</name>
>> <value>1073741824</value>
>> </property>
>>
>> <property>
>> <name>hive.limit.optimize.enable</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.limit.pushdown.memory.usage</name>
>> <value>0.04</value>
>> </property>
>>
>> <property>
>> <name>hive.map.aggr</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.map.aggr.hash.force.flush.memory.threshold</name>
>> <value>0.9</value>
>> </property>
>>
>> <property>
>> <name>hive.map.aggr.hash.min.reduction</name>
>> <value>0.5</value>
>> </property>
>>
>> <property>
>> <name>hive.map.aggr.hash.percentmemory</name>
>> <value>0.5</value>
>> </property>
>>
>> <property>
>> <name>hive.mapjoin.bucket.cache.size</name>
>> <value>10000</value>
>> </property>
>>
>> <property>
>> <name>hive.mapjoin.optimized.hashtable</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.mapred.reduce.tasks.speculative.execution</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.merge.mapfiles</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.merge.mapredfiles</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.merge.orcfile.stripe.level</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.merge.rcfile.block.level</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.merge.size.per.task</name>
>> <value>256000000</value>
>> </property>
>>
>> <property>
>> <name>hive.merge.smallfiles.avgsize</name>
>> <value>16000000</value>
>> </property>
>>
>> <property>
>> <name>hive.merge.tezfiles</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.metastore.authorization.storage.checks</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.metastore.cache.pinobjtypes</name>
>> <value>Table,Database,Type,FieldSchema,Order</value>
>> </property>
>>
>> <property>
>> <name>hive.metastore.client.connect.retry.delay</name>
>> <value>5s</value>
>> </property>
>>
>> <property>
>> <name>hive.metastore.client.socket.timeout</name>
>> <value>1800s</value>
>> </property>
>>
>> <property>
>> <name>hive.metastore.connect.retries</name>
>> <value>24</value>
>> </property>
>>
>> <property>
>> <name>hive.metastore.execute.setugi</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.metastore.failure.retries</name>
>> <value>24</value>
>> </property>
>>
>> <property>
>> <name>hive.metastore.kerberos.keytab.file</name>
>> <value>/etc/security/keytabs/hive.service.keytab</value>
>> </property>
>>
>> <property>
>> <name>hive.metastore.kerberos.principal</name>
>> <value>hive/_HOST@<KRB_REALM></value>
>> </property>
>>
>> <property>
>> <name>hive.metastore.pre.event.listeners</name>
>>
>> <value>org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener</value>
>> </property>
>>
>> <property>
>> <name>hive.metastore.sasl.enabled</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.metastore.server.max.threads</name>
>> <value>100000</value>
>> </property>
>>
>> <property>
>> <name>hive.metastore.uris</name>
>> <value>thrift://<db_srv>:9083</value>
>> </property>
>>
>> <property>
>> <name>hive.metastore.warehouse.dir</name>
>> <value>/apps/hive/warehouse</value>
>> </property>
>>
>> <property>
>> <name>hive.optimize.bucketmapjoin</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.optimize.bucketmapjoin.sortedmerge</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.optimize.constant.propagation</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.optimize.index.filter</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.optimize.metadataonly</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.optimize.null.scan</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.optimize.reducededuplication</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.optimize.reducededuplication.min.reducer</name>
>> <value>1</value>
>> </property>
>>
>> <property>
>> <name>hive.optimize.sort.dynamic.partition</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.orc.compute.splits.num.threads</name>
>> <value>10</value>
>> </property>
>>
>> <property>
>> <name>hive.orc.splits.include.file.footer</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.prewarm.enabled</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.prewarm.numcontainers</name>
>> <value>3</value>
>> </property>
>>
>> <property>
>> <name>hive.security.authenticator.manager</name>
>>
>> <value>org.apache.hadoop.hive.ql.security.ProxyUserAuthenticator</value>
>> </property>
>>
>> <property>
>> <name>hive.security.authorization.enabled</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.security.authorization.manager</name>
>>
>> <value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdConfOnlyAuthorizerFactory</value>
>> </property>
>>
>> <property>
>> <name>hive.security.metastore.authenticator.manager</name>
>>
>> <value>org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator</value>
>> </property>
>>
>> <property>
>> <name>hive.security.metastore.authorization.auth.reads</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.security.metastore.authorization.manager</name>
>>
>> <value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.allow.user.substitution</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.authentication</name>
>> <value>KERBEROS</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.authentication.kerberos.keytab</name>
>> <value>/etc/security/keytabs/hive.service.keytab</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.authentication.kerberos.principal</name>
>> <value>hive/_HOST@<KRB_REALM></value>
>> </property>
>>
>> <property>
>> <name>hive.server2.authentication.spnego.keytab</name>
>> <value>/etc/security/keytabs/spnego.service.keytab</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.authentication.spnego.principal</name>
>> <value>HTTP/_HOST@<KRB_REALM></value>
>> </property>
>>
>> <property>
>> <name>hive.server2.enable.doAs</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.keystore.password</name>
>> <value>hive_server2</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.keystore.path</name>
>> <value>/etc/hive/keystores/hive2.jks</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.logging.operation.enabled</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.logging.operation.log.location</name>
>> <value>${system:java.io.tmpdir}/${system:user.name
>> }/operation_logs</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.support.dynamic.service.discovery</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.table.type.mapping</name>
>> <value>CLASSIC</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.tez.default.queues</name>
>> <value>default</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.tez.initialize.default.sessions</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.tez.sessions.per.default.queue</name>
>> <value>1</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.thrift.http.path</name>
>> <value>cliservice</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.thrift.http.port</name>
>> <value>10001</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.thrift.max.worker.threads</name>
>> <value>500</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.thrift.port</name>
>> <value>10000</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.thrift.sasl.qop</name>
>> <value>auth</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.transport.mode</name>
>> <value>binary</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.use.SSL</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.server2.zookeeper.namespace</name>
>> <value>hiveserver2</value>
>> </property>
>>
>> <property>
>> <name>hive.smbjoin.cache.rows</name>
>> <value>10000</value>
>> </property>
>>
>> <property>
>> <name>hive.stats.autogather</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.stats.dbclass</name>
>> <value>fs</value>
>> </property>
>>
>> <property>
>> <name>hive.stats.fetch.column.stats</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.stats.fetch.partition.stats</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.support.concurrency</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.tez.auto.reducer.parallelism</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.tez.container.size</name>
>> <value>2048</value>
>> </property>
>>
>> <property>
>> <name>hive.tez.cpu.vcores</name>
>> <value>-1</value>
>> </property>
>>
>> <property>
>> <name>hive.tez.dynamic.partition.pruning</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.tez.dynamic.partition.pruning.max.data.size</name>
>> <value>104857600</value>
>> </property>
>>
>> <property>
>> <name>hive.tez.dynamic.partition.pruning.max.event.size</name>
>> <value>1048576</value>
>> </property>
>>
>> <property>
>> <name>hive.tez.input.format</name>
>> <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
>> </property>
>>
>> <property>
>> <name>hive.tez.java.opts</name>
>> <value>-server -Xmx1639m -Djava.net.preferIPv4Stack=true
>> -XX:NewRatio=8 -XX:+UseNUMA -XX:+UseParallelGC -XX:+PrintGCDetails
>> -verbose:gc -XX:+PrintGCTimeStamps</value>
>> </property>
>>
>> <property>
>> <name>hive.tez.log.level</name>
>> <value>INFO</value>
>> </property>
>>
>> <property>
>> <name>hive.tez.max.partition.factor</name>
>> <value>2.0</value>
>> </property>
>>
>> <property>
>> <name>hive.tez.min.partition.factor</name>
>> <value>0.25</value>
>> </property>
>>
>> <property>
>> <name>hive.tez.smb.number.waves</name>
>> <value>0.5</value>
>> </property>
>>
>> <property>
>> <name>hive.txn.manager</name>
>> <value>org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager</value>
>> </property>
>>
>> <property>
>> <name>hive.txn.max.open.batch</name>
>> <value>1000</value>
>> </property>
>>
>> <property>
>> <name>hive.txn.timeout</name>
>> <value>300</value>
>> </property>
>>
>> <property>
>> <name>hive.user.install.directory</name>
>> <value>/tmp/</value>
>> </property>
>>
>> <property>
>> <name>hive.vectorized.execution.enabled</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.vectorized.execution.reduce.enabled</name>
>> <value>false</value>
>> </property>
>>
>> <property>
>> <name>hive.vectorized.groupby.checkinterval</name>
>> <value>1024</value>
>> </property>
>>
>> <property>
>> <name>hive.vectorized.groupby.flush.percent</name>
>> <value>0.1</value>
>> </property>
>>
>> <property>
>> <name>hive.vectorized.groupby.maxentries</name>
>> <value>100000</value>
>> </property>
>>
>> <property>
>> <name>hive.warehouse.subdir.inherit.perms</name>
>> <value>true</value>
>> </property>
>>
>> <property>
>> <name>hive.zookeeper.client.port</name>
>> <value>2181</value>
>> </property>
>>
>> <property>
>> <name>hive.zookeeper.namespace</name>
>> <value>hive_zookeeper_namespace</value>
>> </property>
>>
>> <property>
>> <name>hive.zookeeper.quorum</name>
>> <value><srv1>:2181,<srv2>:2181,<srv3>:2181</value>
>> </property>
>>
>> <property>
>> <name>javax.jdo.option.ConnectionDriverName</name>
>> <value>org.postgresql.Driver</value>
>> </property>
>>
>> <property>
>> <name>javax.jdo.option.ConnectionURL</name>
>> <value>jdbc:postgresql://<db_srv>:<port>/hive</value>
>> </property>
>>
>> <property>
>> <name>javax.jdo.option.ConnectionUserName</name>
>> <value>hive</value>
>> </property>
>>
>> </configuration>
>> ###
>>
>> When I run the workflow, I get the following error message :
>> ###
>> # oozie job -oozie=http://<OOZIE_SRV>:<OOZIE_PORT>/oozie -info
>> 0004487-160314163828133-oozie-oozi-W -verbose
>>
>> Job ID : 0004487-160314163828133-oozie-oozi-W
>>
>> ------------------------------------------------------------------------------------------------------------------------------------
>> Workflow Name : TEST_HIVE_ACTION
>> App Path : hdfs://<NAMENODE_SRV>:8020/tmp/test/hive.xml
>> Status : KILLED
>> Run : 0
>> User : <myuser>
>> Group : -
>> Created : 2016-03-29 13:37:46 GMT
>> Started : 2016-03-29 13:37:46 GMT
>> Last Modified : 2016-03-29 13:38:11 GMT
>> Ended : 2016-03-29 13:38:11 GMT
>> CoordAction ID: -
>>
>> Actions
>>
>> ------------------------------------------------------------------------------------------------------------------------------------
>> ID Console URL Error Code Error Message External ID
>> External Status Name RetriesTracker URI Type Started Status Ended
>>
>> ------------------------------------------------------------------------------------------------------------------------------------
>> 0004487-160314163828133-oozie-oozi-W@:start: - - -
>> - OK :start: 0 -:START: 2016-03-29 13:37:46 GMT OK
>> 2016-03-29 13:37:46 GMT
>>
>> ------------------------------------------------------------------------------------------------------------------------------------
>> 0004487-160314163828133-oozie-oozi-W@init-hive http://<NAMENODE_SRV>:8088/proxy/application_1457363931356_28038/
>> JA018 org.apache.tez.dag.api.SessionNotRunning: TezSession has already
>> shutdown. Application application_1457363931356_28039 failed 2 times due to
>> AM Container for appattempt_1457363931356_28039_000002 exited with
>> exitCode: 1
>> For more detailed output, check application tracking page:http://<NAMENODE_SRV>:8088/cluster/app/application_1457363931356_28039Then,
>> click on links to logs of each attempt.
>> Diagnostics: Exception from container-launch.
>> Container id: container_e19_1457363931356 job_1457363931356_28038
>> FAILED/KILLED init-hive 0<NAMENODE_SRV>:8050 hive 2016-03-29
>> 13:37:46 GMT ERROR 2016-03-29 13:38:11 GMT
>>
>> ------------------------------------------------------------------------------------------------------------------------------------
>> 0004487-160314163828133-oozie-oozi-W@fail - E0729 Script
>> failed, error message[org.apache.tez.dag.api.SessionNotRunning: TezSession
>> has already shutdown. Application application_1457363931356_28039 failed 2
>> times due to AM Container for appattempt_1457363931356_28039_000002 exited
>> with exitCode: 1
>> For more detailed output, check application tracking page:http://<NAMENODE_SRV>:<RM_PORT>/cluster/app/application_1457363931356_28039Then,
>> click on links to logs of each attempt.
>> Diagnostics: Exception from container-launch.
>> Container id - OK fail 0 - :KILL:
>> 2016-03-29 13:38:11 GMT OK 2016-03-29 13:38:11 GMT
>>
>> ------------------------------------------------------------------------------------------------------------------------------------
>> ###
>>
>> And when I check the app attempt, here is what I found :
>> ###
>> Container: container_e19_1457363931356_28039_01_000001 on <DATANODE>_45454
>>
>> ==================================================================================================
>> LogType:stderr
>> Log Upload Time:Tue Mar 29 15:38:10 +0200 2016
>> LogLength:77
>> Log Contents:
>> Error: Could not find or load main class
>> org.apache.tez.dag.app.DAGAppMaster
>> End of LogType:stderr
>>
>> LogType:stdout
>> Log Upload Time:Tue Mar 29 15:38:10 +0200 2016
>> LogLength:949
>> Log Contents:
>> Heap
>> PSYoungGen total 602112K, used 10322K [0x00000007d5500000,
>> 0x00000007ff500000, 0x0000000800000000)
>> eden space 516096K, 2% used
>> [0x00000007d5500000,0x00000007d5f14898,0x00000007f4d00000)
>> lgrp 0 space 258048K, 4% used
>> [0x00000007d5500000,0x00000007d5f14898,0x00000007e5100000)
>> lgrp 1 space 258048K, 0% used
>> [0x00000007e5100000,0x00000007e5100000,0x00000007f4d00000)
>> from space 86016K, 0% used
>> [0x00000007fa100000,0x00000007fa100000,0x00000007ff500000)
>> to space 86016K, 0% used
>> [0x00000007f4d00000,0x00000007f4d00000,0x00000007fa100000)
>> ParOldGen total 1375232K, used 0K [0x0000000780000000,
>> 0x00000007d3f00000, 0x00000007d5500000)
>> object space 1375232K, 0% used
>> [0x0000000780000000,0x0000000780000000,0x00000007d3f00000)
>> PSPermGen total 21504K, used 2874K [0x0000000775a00000,
>> 0x0000000776f00000, 0x0000000780000000)
>> object space 21504K, 13% used
>> [0x0000000775a00000,0x0000000775cce858,0x0000000776f00000)
>> End of LogType:stdout
>> ###
>>
>> So this is the following error which blocks my workflow :
>> ###
>> Could not find or load main class org.apache.tez.dag.app.DAGAppMaster
>> ###
>>
>> I don't understand why I have this error message, because I set the
>> tez.lib.uris in my workflow.xml.
>>
>> In the path for Tez libs, I have a tar.gz file containing all the
>> necessary jars like the tez-dag containing this class.
>>
>> So I send you this mail just to know how to use Tez as execution engine
>> in an oozie workflow. May someone help me please ?
>>
>> Best regards.
>>
>> Morgrim.
>>
>
>
Re: Hive action in oozie using tez engine
Posted by Morgrim Murdargent <mo...@gmail.com>.
As a note, I don't have this error when oozie is out of the equation.
Best regards.
Hope you can help me.
Morgrim.
On Tue, Mar 29, 2016 at 4:41 PM, Morgrim Murdargent <morgrim.oozie@gmail.com
> wrote:
> Hello !
>
> I have a very simple workflow, using a database to create a table.
>
> Here is my property file (I replaced the real values for security reasons)
> :
> ###
> #
> ------------------------------------------------------------------------------
> # Environment
> #
> ------------------------------------------------------------------------------
> nameNode=hdfs://<NAMENODE>:8020
> jobTracker=<RESOURCE MANAGER>:8050
> kerberosRealm=<KERBEROS REALM>
> queueName=<myqueue>
> hiveMetastoreHost=<HIVEMETASTORE_HOST>
>
> #
> ------------------------------------------------------------------------------
> # Application
> #
> ------------------------------------------------------------------------------
> appRoot=${nameNode}/tmp/test
> oozie.wf.application.path=${appRoot}/hive.xml
>
> #
> ------------------------------------------------------------------------------
> # Oozie
> #
> ------------------------------------------------------------------------------
> oozie.use.system.libpath=true
> oozie.wf.rerun.failnodes=true
>
> #
> ------------------------------------------------------------------------------
> # Kerberos
> #
> ------------------------------------------------------------------------------
> hcatMetastoreUri=thrift://${hiveMetastoreHost}:9083
> hcatMetastorePrincipal=hive/_HOST@${kerberosRealm}
> hiveConfPath=${appRoot}/hive-site.xml
>
> #
> ------------------------------------------------------------------------------
> # Tez
> #
> ------------------------------------------------------------------------------
> tezLibPath=${nameNode}/hdp/apps/2.3.2.0-2950/tez/
> ###
>
> Here is the xml file for the workflow :
> ###
> <workflow-app xmlns="uri:oozie:workflow:0.5" name="TEST_HIVE_ACTION">
> <credentials>
> <credential name="hive_credentials" type="hcat">
> <property>
> <name>hcat.metastore.uri</name>
> <value>${hcatMetastoreUri}</value>
> </property>
> <property>
> <name>hcat.metastore.principal</name>
> <value>${hcatMetastorePrincipal}</value>
> </property>
> </credential>
> </credentials>
> <start to="init-hive" />
> <action name="init-hive" cred="hive_credentials">
> <hive xmlns="uri:oozie:hive-action:0.2">
> <job-tracker>${jobTracker}</job-tracker>
> <name-node>${nameNode}</name-node>
> <job-xml>${hiveConfPath}</job-xml>
> <configuration>
> <property>
> <name>oozie.hive.defaults</name>
> <value>${hiveConfPath}</value>
> </property>
> <property>
> <name>hive.execution.engine</name>
> <value>tez</value>
> </property>
> <property>
> <name>mapreduce.job.queuename</name>
> <value>${queueName}</value>
> </property>
> <property>
> <name>tez.queue.name</name>
> <value>${queueName}</value>
> </property>
> <property>
> <name>tez.lib.uris</name>
> <value>${tezLibPath}</value>
> </property>
> </configuration>
> <script>hive.hql</script>
> <file>hive.hql#hive.hql</file>
> </hive>
> <ok to="end"/>
> <error to="fail"/>
> </action>
>
> <kill name="fail">
> <message>Script failed, error
> message[${wf:errorMessage(wf:lastErrorNode())}]</message>
> </kill>
>
> <end name="end"/>
> </workflow-app>
> ###
>
> Here is the hql file (very simple, it is just for the test ;-)) :
> ###
> use mabdd;
> create table if not exists test_tez_via_oozie (test string);
> ###
>
> Here is the hive-site.xml
> ###
> <configuration>
>
> <property>
> <name>ambari.hive.db.schema.name</name>
> <value>hive</value>
> </property>
>
> <property>
> <name>datanucleus.autoCreateSchema</name>
> <value>false</value>
> </property>
>
> <property>
> <name>datanucleus.cache.level2.type</name>
> <value>none</value>
> </property>
>
> <property>
> <name>fs.file.impl.disable.cache</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.auto.convert.join</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.auto.convert.join.noconditionaltask</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.auto.convert.join.noconditionaltask.size</name>
> <value>999999668</value>
> </property>
>
> <property>
> <name>hive.auto.convert.sortmerge.join</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.auto.convert.sortmerge.join.to.mapjoin</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.cbo.enable</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.cli.print.header</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.cluster.delegation.token.store.class</name>
> <value>org.apache.hadoop.hive.thrift.ZooKeeperTokenStore</value>
> </property>
>
> <property>
>
> <name>hive.cluster.delegation.token.store.zookeeper.connectString</name>
> <value><srv1>:2181,<srv2>:2181,<rv3>:2181</value>
> </property>
>
> <property>
> <name>hive.cluster.delegation.token.store.zookeeper.znode</name>
> <value>/hive/cluster/delegation</value>
> </property>
>
> <property>
> <name>hive.compactor.abortedtxn.threshold</name>
> <value>1000</value>
> </property>
>
> <property>
> <name>hive.compactor.check.interval</name>
> <value>300L</value>
> </property>
>
> <property>
> <name>hive.compactor.delta.num.threshold</name>
> <value>10</value>
> </property>
>
> <property>
> <name>hive.compactor.delta.pct.threshold</name>
> <value>0.1f</value>
> </property>
>
> <property>
> <name>hive.compactor.initiator.on</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.compactor.worker.threads</name>
> <value>0</value>
> </property>
>
> <property>
> <name>hive.compactor.worker.timeout</name>
> <value>86400L</value>
> </property>
>
> <property>
> <name>hive.compute.query.using.stats</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.conf.restricted.list</name>
>
> <value>hive.security.authenticator.manager,hive.security.authorization.manager,hive.users.in.admin.role</value>
> </property>
>
> <property>
> <name>hive.convert.join.bucket.mapjoin.tez</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.default.fileformat</name>
> <value>TextFile</value>
> </property>
>
> <property>
> <name>hive.default.fileformat.managed</name>
> <value>TextFile</value>
> </property>
>
> <property>
> <name>hive.enforce.bucketing</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.enforce.sorting</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.enforce.sortmergebucketmapjoin</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.exec.compress.intermediate</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.exec.compress.output</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.exec.dynamic.partition</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.exec.dynamic.partition.mode</name>
> <value>strict</value>
> </property>
>
> <property>
> <name>hive.exec.failure.hooks</name>
> <value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
> </property>
>
> <property>
> <name>hive.exec.max.created.files</name>
> <value>100000</value>
> </property>
>
> <property>
> <name>hive.exec.max.dynamic.partitions</name>
> <value>5000</value>
> </property>
>
> <property>
> <name>hive.exec.max.dynamic.partitions.pernode</name>
> <value>2000</value>
> </property>
>
> <property>
> <name>hive.exec.orc.compression.strategy</name>
> <value>SPEED</value>
> </property>
>
> <property>
> <name>hive.exec.orc.default.compress</name>
> <value>ZLIB</value>
> </property>
>
> <property>
> <name>hive.exec.orc.default.stripe.size</name>
> <value>67108864</value>
> </property>
>
> <property>
> <name>hive.exec.orc.encoding.strategy</name>
> <value>SPEED</value>
> </property>
>
> <property>
> <name>hive.exec.parallel</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.exec.parallel.thread.number</name>
> <value>8</value>
> </property>
>
> <property>
> <name>hive.exec.post.hooks</name>
> <value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
> </property>
>
> <property>
> <name>hive.exec.pre.hooks</name>
> <value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
> </property>
>
> <property>
> <name>hive.exec.reducers.bytes.per.reducer</name>
> <value>67108864</value>
> </property>
>
> <property>
> <name>hive.exec.reducers.max</name>
> <value>1009</value>
> </property>
>
> <property>
> <name>hive.exec.scratchdir</name>
> <value>/tmp/hive</value>
> </property>
>
> <property>
> <name>hive.exec.submit.local.task.via.child</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.exec.submitviachild</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.execution.engine</name>
> <value>mr</value>
> </property>
>
> <property>
> <name>hive.fetch.task.aggr</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.fetch.task.conversion</name>
> <value>more</value>
> </property>
>
> <property>
> <name>hive.fetch.task.conversion.threshold</name>
> <value>1073741824</value>
> </property>
>
> <property>
> <name>hive.limit.optimize.enable</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.limit.pushdown.memory.usage</name>
> <value>0.04</value>
> </property>
>
> <property>
> <name>hive.map.aggr</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.map.aggr.hash.force.flush.memory.threshold</name>
> <value>0.9</value>
> </property>
>
> <property>
> <name>hive.map.aggr.hash.min.reduction</name>
> <value>0.5</value>
> </property>
>
> <property>
> <name>hive.map.aggr.hash.percentmemory</name>
> <value>0.5</value>
> </property>
>
> <property>
> <name>hive.mapjoin.bucket.cache.size</name>
> <value>10000</value>
> </property>
>
> <property>
> <name>hive.mapjoin.optimized.hashtable</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.mapred.reduce.tasks.speculative.execution</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.merge.mapfiles</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.merge.mapredfiles</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.merge.orcfile.stripe.level</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.merge.rcfile.block.level</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.merge.size.per.task</name>
> <value>256000000</value>
> </property>
>
> <property>
> <name>hive.merge.smallfiles.avgsize</name>
> <value>16000000</value>
> </property>
>
> <property>
> <name>hive.merge.tezfiles</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.metastore.authorization.storage.checks</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.metastore.cache.pinobjtypes</name>
> <value>Table,Database,Type,FieldSchema,Order</value>
> </property>
>
> <property>
> <name>hive.metastore.client.connect.retry.delay</name>
> <value>5s</value>
> </property>
>
> <property>
> <name>hive.metastore.client.socket.timeout</name>
> <value>1800s</value>
> </property>
>
> <property>
> <name>hive.metastore.connect.retries</name>
> <value>24</value>
> </property>
>
> <property>
> <name>hive.metastore.execute.setugi</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.metastore.failure.retries</name>
> <value>24</value>
> </property>
>
> <property>
> <name>hive.metastore.kerberos.keytab.file</name>
> <value>/etc/security/keytabs/hive.service.keytab</value>
> </property>
>
> <property>
> <name>hive.metastore.kerberos.principal</name>
> <value>hive/_HOST@<KRB_REALM></value>
> </property>
>
> <property>
> <name>hive.metastore.pre.event.listeners</name>
>
> <value>org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener</value>
> </property>
>
> <property>
> <name>hive.metastore.sasl.enabled</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.metastore.server.max.threads</name>
> <value>100000</value>
> </property>
>
> <property>
> <name>hive.metastore.uris</name>
> <value>thrift://<db_srv>:9083</value>
> </property>
>
> <property>
> <name>hive.metastore.warehouse.dir</name>
> <value>/apps/hive/warehouse</value>
> </property>
>
> <property>
> <name>hive.optimize.bucketmapjoin</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.optimize.bucketmapjoin.sortedmerge</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.optimize.constant.propagation</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.optimize.index.filter</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.optimize.metadataonly</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.optimize.null.scan</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.optimize.reducededuplication</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.optimize.reducededuplication.min.reducer</name>
> <value>1</value>
> </property>
>
> <property>
> <name>hive.optimize.sort.dynamic.partition</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.orc.compute.splits.num.threads</name>
> <value>10</value>
> </property>
>
> <property>
> <name>hive.orc.splits.include.file.footer</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.prewarm.enabled</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.prewarm.numcontainers</name>
> <value>3</value>
> </property>
>
> <property>
> <name>hive.security.authenticator.manager</name>
>
> <value>org.apache.hadoop.hive.ql.security.ProxyUserAuthenticator</value>
> </property>
>
> <property>
> <name>hive.security.authorization.enabled</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.security.authorization.manager</name>
>
> <value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdConfOnlyAuthorizerFactory</value>
> </property>
>
> <property>
> <name>hive.security.metastore.authenticator.manager</name>
>
> <value>org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator</value>
> </property>
>
> <property>
> <name>hive.security.metastore.authorization.auth.reads</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.security.metastore.authorization.manager</name>
>
> <value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
> </property>
>
> <property>
> <name>hive.server2.allow.user.substitution</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.server2.authentication</name>
> <value>KERBEROS</value>
> </property>
>
> <property>
> <name>hive.server2.authentication.kerberos.keytab</name>
> <value>/etc/security/keytabs/hive.service.keytab</value>
> </property>
>
> <property>
> <name>hive.server2.authentication.kerberos.principal</name>
> <value>hive/_HOST@<KRB_REALM></value>
> </property>
>
> <property>
> <name>hive.server2.authentication.spnego.keytab</name>
> <value>/etc/security/keytabs/spnego.service.keytab</value>
> </property>
>
> <property>
> <name>hive.server2.authentication.spnego.principal</name>
> <value>HTTP/_HOST@<KRB_REALM></value>
> </property>
>
> <property>
> <name>hive.server2.enable.doAs</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.server2.keystore.password</name>
> <value>hive_server2</value>
> </property>
>
> <property>
> <name>hive.server2.keystore.path</name>
> <value>/etc/hive/keystores/hive2.jks</value>
> </property>
>
> <property>
> <name>hive.server2.logging.operation.enabled</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.server2.logging.operation.log.location</name>
> <value>${system:java.io.tmpdir}/${system:user.name
> }/operation_logs</value>
> </property>
>
> <property>
> <name>hive.server2.support.dynamic.service.discovery</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.server2.table.type.mapping</name>
> <value>CLASSIC</value>
> </property>
>
> <property>
> <name>hive.server2.tez.default.queues</name>
> <value>default</value>
> </property>
>
> <property>
> <name>hive.server2.tez.initialize.default.sessions</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.server2.tez.sessions.per.default.queue</name>
> <value>1</value>
> </property>
>
> <property>
> <name>hive.server2.thrift.http.path</name>
> <value>cliservice</value>
> </property>
>
> <property>
> <name>hive.server2.thrift.http.port</name>
> <value>10001</value>
> </property>
>
> <property>
> <name>hive.server2.thrift.max.worker.threads</name>
> <value>500</value>
> </property>
>
> <property>
> <name>hive.server2.thrift.port</name>
> <value>10000</value>
> </property>
>
> <property>
> <name>hive.server2.thrift.sasl.qop</name>
> <value>auth</value>
> </property>
>
> <property>
> <name>hive.server2.transport.mode</name>
> <value>binary</value>
> </property>
>
> <property>
> <name>hive.server2.use.SSL</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.server2.zookeeper.namespace</name>
> <value>hiveserver2</value>
> </property>
>
> <property>
> <name>hive.smbjoin.cache.rows</name>
> <value>10000</value>
> </property>
>
> <property>
> <name>hive.stats.autogather</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.stats.dbclass</name>
> <value>fs</value>
> </property>
>
> <property>
> <name>hive.stats.fetch.column.stats</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.stats.fetch.partition.stats</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.support.concurrency</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.tez.auto.reducer.parallelism</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.tez.container.size</name>
> <value>2048</value>
> </property>
>
> <property>
> <name>hive.tez.cpu.vcores</name>
> <value>-1</value>
> </property>
>
> <property>
> <name>hive.tez.dynamic.partition.pruning</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.tez.dynamic.partition.pruning.max.data.size</name>
> <value>104857600</value>
> </property>
>
> <property>
> <name>hive.tez.dynamic.partition.pruning.max.event.size</name>
> <value>1048576</value>
> </property>
>
> <property>
> <name>hive.tez.input.format</name>
> <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
> </property>
>
> <property>
> <name>hive.tez.java.opts</name>
> <value>-server -Xmx1639m -Djava.net.preferIPv4Stack=true
> -XX:NewRatio=8 -XX:+UseNUMA -XX:+UseParallelGC -XX:+PrintGCDetails
> -verbose:gc -XX:+PrintGCTimeStamps</value>
> </property>
>
> <property>
> <name>hive.tez.log.level</name>
> <value>INFO</value>
> </property>
>
> <property>
> <name>hive.tez.max.partition.factor</name>
> <value>2.0</value>
> </property>
>
> <property>
> <name>hive.tez.min.partition.factor</name>
> <value>0.25</value>
> </property>
>
> <property>
> <name>hive.tez.smb.number.waves</name>
> <value>0.5</value>
> </property>
>
> <property>
> <name>hive.txn.manager</name>
> <value>org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager</value>
> </property>
>
> <property>
> <name>hive.txn.max.open.batch</name>
> <value>1000</value>
> </property>
>
> <property>
> <name>hive.txn.timeout</name>
> <value>300</value>
> </property>
>
> <property>
> <name>hive.user.install.directory</name>
> <value>/tmp/</value>
> </property>
>
> <property>
> <name>hive.vectorized.execution.enabled</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.vectorized.execution.reduce.enabled</name>
> <value>false</value>
> </property>
>
> <property>
> <name>hive.vectorized.groupby.checkinterval</name>
> <value>1024</value>
> </property>
>
> <property>
> <name>hive.vectorized.groupby.flush.percent</name>
> <value>0.1</value>
> </property>
>
> <property>
> <name>hive.vectorized.groupby.maxentries</name>
> <value>100000</value>
> </property>
>
> <property>
> <name>hive.warehouse.subdir.inherit.perms</name>
> <value>true</value>
> </property>
>
> <property>
> <name>hive.zookeeper.client.port</name>
> <value>2181</value>
> </property>
>
> <property>
> <name>hive.zookeeper.namespace</name>
> <value>hive_zookeeper_namespace</value>
> </property>
>
> <property>
> <name>hive.zookeeper.quorum</name>
> <value><srv1>:2181,<srv2>:2181,<srv3>:2181</value>
> </property>
>
> <property>
> <name>javax.jdo.option.ConnectionDriverName</name>
> <value>org.postgresql.Driver</value>
> </property>
>
> <property>
> <name>javax.jdo.option.ConnectionURL</name>
> <value>jdbc:postgresql://<db_srv>:<port>/hive</value>
> </property>
>
> <property>
> <name>javax.jdo.option.ConnectionUserName</name>
> <value>hive</value>
> </property>
>
> </configuration>
> ###
>
> When I run the workflow, I get the following error message :
> ###
> # oozie job -oozie=http://<OOZIE_SRV>:<OOZIE_PORT>/oozie -info
> 0004487-160314163828133-oozie-oozi-W -verbose
>
> Job ID : 0004487-160314163828133-oozie-oozi-W
>
> ------------------------------------------------------------------------------------------------------------------------------------
> Workflow Name : TEST_HIVE_ACTION
> App Path : hdfs://<NAMENODE_SRV>:8020/tmp/test/hive.xml
> Status : KILLED
> Run : 0
> User : <myuser>
> Group : -
> Created : 2016-03-29 13:37:46 GMT
> Started : 2016-03-29 13:37:46 GMT
> Last Modified : 2016-03-29 13:38:11 GMT
> Ended : 2016-03-29 13:38:11 GMT
> CoordAction ID: -
>
> Actions
>
> ------------------------------------------------------------------------------------------------------------------------------------
> ID Console URL Error Code Error Message External ID
> External Status Name RetriesTracker URI Type Started Status Ended
>
> ------------------------------------------------------------------------------------------------------------------------------------
> 0004487-160314163828133-oozie-oozi-W@:start: - - -
> - OK :start: 0 -:START: 2016-03-29 13:37:46 GMT OK
> 2016-03-29 13:37:46 GMT
>
> ------------------------------------------------------------------------------------------------------------------------------------
> 0004487-160314163828133-oozie-oozi-W@init-hive http://<NAMENODE_SRV>:8088/proxy/application_1457363931356_28038/
> JA018 org.apache.tez.dag.api.SessionNotRunning: TezSession has already
> shutdown. Application application_1457363931356_28039 failed 2 times due to
> AM Container for appattempt_1457363931356_28039_000002 exited with
> exitCode: 1
> For more detailed output, check application tracking page:http://<NAMENODE_SRV>:8088/cluster/app/application_1457363931356_28039Then,
> click on links to logs of each attempt.
> Diagnostics: Exception from container-launch.
> Container id: container_e19_1457363931356 job_1457363931356_28038
> FAILED/KILLED init-hive 0<NAMENODE_SRV>:8050 hive 2016-03-29
> 13:37:46 GMT ERROR 2016-03-29 13:38:11 GMT
>
> ------------------------------------------------------------------------------------------------------------------------------------
> 0004487-160314163828133-oozie-oozi-W@fail - E0729 Script
> failed, error message[org.apache.tez.dag.api.SessionNotRunning: TezSession
> has already shutdown. Application application_1457363931356_28039 failed 2
> times due to AM Container for appattempt_1457363931356_28039_000002 exited
> with exitCode: 1
> For more detailed output, check application tracking page:http://<NAMENODE_SRV>:<RM_PORT>/cluster/app/application_1457363931356_28039Then,
> click on links to logs of each attempt.
> Diagnostics: Exception from container-launch.
> Container id - OK fail 0 - :KILL: 2016-03-29
> 13:38:11 GMT OK 2016-03-29 13:38:11 GMT
>
> ------------------------------------------------------------------------------------------------------------------------------------
> ###
>
> And when I check the app attempt, here is what I found :
> ###
> Container: container_e19_1457363931356_28039_01_000001 on <DATANODE>_45454
>
> ==================================================================================================
> LogType:stderr
> Log Upload Time:Tue Mar 29 15:38:10 +0200 2016
> LogLength:77
> Log Contents:
> Error: Could not find or load main class
> org.apache.tez.dag.app.DAGAppMaster
> End of LogType:stderr
>
> LogType:stdout
> Log Upload Time:Tue Mar 29 15:38:10 +0200 2016
> LogLength:949
> Log Contents:
> Heap
> PSYoungGen total 602112K, used 10322K [0x00000007d5500000,
> 0x00000007ff500000, 0x0000000800000000)
> eden space 516096K, 2% used
> [0x00000007d5500000,0x00000007d5f14898,0x00000007f4d00000)
> lgrp 0 space 258048K, 4% used
> [0x00000007d5500000,0x00000007d5f14898,0x00000007e5100000)
> lgrp 1 space 258048K, 0% used
> [0x00000007e5100000,0x00000007e5100000,0x00000007f4d00000)
> from space 86016K, 0% used
> [0x00000007fa100000,0x00000007fa100000,0x00000007ff500000)
> to space 86016K, 0% used
> [0x00000007f4d00000,0x00000007f4d00000,0x00000007fa100000)
> ParOldGen total 1375232K, used 0K [0x0000000780000000,
> 0x00000007d3f00000, 0x00000007d5500000)
> object space 1375232K, 0% used
> [0x0000000780000000,0x0000000780000000,0x00000007d3f00000)
> PSPermGen total 21504K, used 2874K [0x0000000775a00000,
> 0x0000000776f00000, 0x0000000780000000)
> object space 21504K, 13% used
> [0x0000000775a00000,0x0000000775cce858,0x0000000776f00000)
> End of LogType:stdout
> ###
>
> So this is the following error which blocks my workflow :
> ###
> Could not find or load main class org.apache.tez.dag.app.DAGAppMaster
> ###
>
> I don't understand why I have this error message, because I set the
> tez.lib.uris in my workflow.xml.
>
> In the path for Tez libs, I have a tar.gz file containing all the
> necessary jars like the tez-dag containing this class.
>
> So I send you this mail just to know how to use Tez as execution engine in
> an oozie workflow. May someone help me please ?
>
> Best regards.
>
> Morgrim.
>