You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Oleksiy Sayankin (JIRA)" <ji...@apache.org> on 2018/05/17 16:06:00 UTC
[jira] [Created] (HIVE-19587) HeartBeat thread uses cancelled
delegation token while connecting to meta on KERBEROS cluster
Oleksiy Sayankin created HIVE-19587:
---------------------------------------
Summary: HeartBeat thread uses cancelled delegation token while connecting to meta on KERBEROS cluster
Key: HIVE-19587
URL: https://issues.apache.org/jira/browse/HIVE-19587
Project: Hive
Issue Type: Bug
Reporter: Oleksiy Sayankin
Assignee: Oleksiy Sayankin
*STEP 1. Create test data*
{code}
create table t1 (id int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ",";
create table t2 (id int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ",";
{code}
Generate 10 000 000 lines of random data
{code}
package com.test.app;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.concurrent.ThreadLocalRandom;
public class App {
public static void main(String[] args) throws FileNotFoundException {
try (PrintWriter out = new PrintWriter("table.data");) {
int min = 0;
int max = 10_000;
int numRows = 10_000_000;
for (int i = 0; i <= numRows - 1; i++){
int randomNum = ThreadLocalRandom.current().nextInt(min, max + 1);
out.println(randomNum);
}
}
}
}
{code}
Upload data to Hive tables
{code}
load data local inpath '/home/myuser/table.data' into table t1;
load data local inpath '/home/myuser/table.data' into table t2;
{code}
*STEP 2. Configure transactions in hive-site.xml*
{code}
<!-- Transactions -->
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
</property>
<property>
<name>hive.support.concurrency</name>
<value>true</value>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>true</value>
</property>
<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
</property>
<property>
<name>hive.compactor.initiator.on</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.worker.threads</name>
<value>1</value>
</property>
{code}
*STEP 3. Configure hive.txn.timeout in hive-site.xml*
{code}
<!-- Delegation token issue -->
<property>
<name>hive.txn.timeout</name>
<value>10s</value>
</property>
{code}
*STEP 4. Connect via beeline to HS2 with KERBEROS*
{code}
!connect jdbc:hive2://node8.cluster:10000/default;principal=myuser/node8.cluster@NODE8;ssl=true;sslTrustStore=/opt/myuser/conf/ssl_truststore
{code}
{code}
select count(*) from t1;
{code}
*STEP 5. Close connection and reconnect*
{code}
!close
{code}
{code}
!connect jdbc:hive2://node8.cluster:10000/default;principal=myuser/node8.cluster@NODE8;ssl=true;sslTrustStore=/opt/myuser/conf/ssl_truststore
{code}
*STEP 6. Perform long playing query*
This query lasts about 600s
{code}
select count(*) from t1 join t2 on t1.id = t2.id;
{code}
*EXPECTED RESULT*
Query finishes successfully
*ACTUAL RESULT*
{code}
2018-05-17T13:54:54,921 ERROR [pool-7-thread-10] transport.TSaslTransport: SASL negotiation failure
javax.security.sasl.SaslException: DIGEST-MD5: IO error acquiring password
at com.sun.security.sasl.digest.DigestMD5Server.validateClientResponse(DigestMD5Server.java:598)
at com.sun.security.sasl.digest.DigestMD5Server.evaluateResponse(DigestMD5Server.java:244)
at org.apache.thrift.transport.TSaslTransport$SaslParticipant.evaluateChallengeOrResponse(TSaslTransport.java:539)
at org.apache.thrift.transport.TSaslTransport.open(TSaslTransport.java:283)
at org.apache.thrift.transport.TSaslServerTransport.open(TSaslServerTransport.java:41)
at org.apache.thrift.transport.TSaslServerTransport$Factory.getTransport(TSaslServerTransport.java:216)
at org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server$TUGIAssumingTransportFactory$1.run(HadoopThriftAuthBridge.java:663)
at org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server$TUGIAssumingTransportFactory$1.run(HadoopThriftAuthBridge.java:660)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:360)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1613)
at org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server$TUGIAssumingTransportFactory.getTransport(HadoopThriftAuthBridge.java:660)
at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:269)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.hadoop.security.token.SecretManager$InvalidToken: token expired or does not exist: owner=myuser, renewer=myuser, realUser=, issueDate=1526565229297, maxDate=1527170029297, sequenceNumber=1, masterKeyId=1
at org.apache.hadoop.hive.thrift.TokenStoreDelegationTokenSecretManager.retrievePassword(TokenStoreDelegationTokenSecretManager.java:104)
at org.apache.hadoop.hive.thrift.TokenStoreDelegationTokenSecretManager.retrievePassword(TokenStoreDelegationTokenSecretManager.java:56)
at org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server$SaslDigestCallbackHandler.getPassword(HadoopThriftAuthBridge.java:472)
at org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server$SaslDigestCallbackHandler.handle(HadoopThriftAuthBridge.java:503)
at com.sun.security.sasl.digest.DigestMD5Server.validateClientResponse(DigestMD5Server.java:589)
... 15 more
2018-05-17T13:54:54,966 ERROR [pool-7-thread-10] server.TThreadPoolServer: Error occurred during processing of message.
java.lang.RuntimeException: org.apache.thrift.transport.TTransportException: DIGEST-MD5: IO error acquiring password
at org.apache.thrift.transport.TSaslServerTransport$Factory.getTransport(TSaslServerTransport.java:219)
at org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server$TUGIAssumingTransportFactory$1.run(HadoopThriftAuthBridge.java:663)
at org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server$TUGIAssumingTransportFactory$1.run(HadoopThriftAuthBridge.java:660)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:360)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1613)
at org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server$TUGIAssumingTransportFactory.getTransport(HadoopThriftAuthBridge.java:660)
at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:269)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.thrift.transport.TTransportException: DIGEST-MD5: IO error acquiring password
at org.apache.thrift.transport.TSaslTransport.sendAndThrowMessage(TSaslTransport.java:232)
at org.apache.thrift.transport.TSaslTransport.open(TSaslTransport.java:316)
at org.apache.thrift.transport.TSaslServerTransport.open(TSaslServerTransport.java:41)
at org.apache.thrift.transport.TSaslServerTransport$Factory.getTransport(TSaslServerTransport.java:216)
... 10 more
{code}
*ROOT-CAUSE*
HS2 uses cancelled delegation token to connect to metastore. From logs
1. Metastore creates delegation token (issueDate=1526565229297)
{code}
2018-05-17T13:53:49,297 INFO [pool-7-thread-3] delegation.AbstractDelegationTokenSecretManager: Creating password for identifier: owner=myuser, renewer=myuser, realUser=, issueDate=1526565229297, m$
2018-05-17T13:53:50,789 INFO [Thread-18] txn.TxnHandler: 'Cleaner' locked by 'node8.cluster'
2018-05-17T13:53:50,793 INFO [Thread-18] txn.TxnHandler: 'Cleaner' unlocked by 'node8.cluster'
2018-05-17T13:53:54,840 INFO [pool-7-thread-4] metastore.HiveMetaStore: 3: Opening raw store with implementation class:org.apache.hadoop.hive.metastore.ObjectStore
{code}
2. Metastore cancels delegation token (issueDate=1526565229297)
{code}
2018-05-17T13:54:32,380 INFO [pool-7-thread-4] thrift.TokenStoreDelegationTokenSecretManager: Token cancelation requested for identifier: owner=mapr, renewer=mapr, realUser=, issueDate=1526565229297, maxDate=1527170029297,$
2018-05-17T13:54:32,416 INFO [pool-7-thread-4] metastore.HiveMetaStore: 3: Cleaning up thread local RawStore...
{code}
3. HS2 uses canceled delegation token and crashes (issueDate=1526565229297):
{code}
2018-05-17T13:54:54,921 ERROR [pool-7-thread-10] transport.TSaslTransport: SASL negotiation failure
javax.security.sasl.SaslException: DIGEST-MD5: IO error acquiring password
at com.sun.security.sasl.digest.DigestMD5Server.validateClientResponse(DigestMD5Server.java:598)
at com.sun.security.sasl.digest.DigestMD5Server.evaluateResponse(DigestMD5Server.java:244)
at org.apache.thrift.transport.TSaslTransport$SaslParticipant.evaluateChallengeOrResponse(TSaslTransport.java:539)
at org.apache.thrift.transport.TSaslTransport.open(TSaslTransport.java:283)
at org.apache.thrift.transport.TSaslServerTransport.open(TSaslServerTransport.java:41)
at org.apache.thrift.transport.TSaslServerTransport$Factory.getTransport(TSaslServerTransport.java:216)
at org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server$TUGIAssumingTransportFactory$1.run(HadoopThriftAuthBridge.java:663)
at org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server$TUGIAssumingTransportFactory$1.run(HadoopThriftAuthBridge.java:660)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:360)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1613)
at org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server$TUGIAssumingTransportFactory.getTransport(HadoopThriftAuthBridge.java:660)
at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:269)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.hadoop.security.token.SecretManager$InvalidToken: token expired or does not exist: owner=myuser, renewer=myuser, realUser=, issueDate=1526565229297, maxDate=1527170029297, sequenceNumber=1, masterKeyId=1
at org.apache.hadoop.hive.thrift.TokenStoreDelegationTokenSecretManager.retrievePassword(TokenStoreDelegationTokenSecretManager.java:104)
{code}
*WORKAROUND*
From Hive sources:
{code}
heartBeatInterval = HIVE_TXN_TIMEOUT / 2.
{code}
So to skip HB during job one needs to set
{code}
HIVE_TXN_TIMEOUT = (max_job_duration * 2) * 1.25.
{code}
Here I added 1.25 factor to be sure there is no HB. In my case max_job_duration = ~600s, then
{code}
HIVE_TXN_TIMEOUT = 600s * 2 * 1.25 = 1500s
{code}
When I {{set hive.txn.timeout}} = 1500s issue was gone on my local env.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)