You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hive.apache.org by Anurag Phadke <ap...@mozilla.com> on 2010/08/02 22:14:36 UTC

too many open files

We are running hive server with mysql as metastore. The server receives 
LOAD DATA hql every 30 sec (from flume), everything runs fine for about 
90 minutes and then the hive server throws following error. (Note: The 
user running hive server has "unlimited" file handle limit.)

any ideas?

10/08/02 12:34:34 ERROR exec.MoveTask: Failed with exception Unable to 
fetch table logs_temp
org.apache.hadoop.hive.ql.metadata.HiveException: Unable to fetch table 
logs_temp
	at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:339)
	at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:310)
	at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:454)
	at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:135)
	at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:379)
	at org.apache.hadoop.hive.ql.Driver.run(Driver.java:285)
	at 
org.apache.hadoop.hive.service.HiveServer$HiveServerHandler.execute(HiveServer.java:115)
	at 
org.apache.hadoop.hive.service.ThriftHive$Processor$execute.process(ThriftHive.java:340)
	at 
org.apache.hadoop.hive.service.ThriftHive$Processor.process(ThriftHive.java:328)
	at 
org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:252)
	at 
java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
	at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
	at java.lang.Thread.run(Thread.java:619)
Caused by: javax.jdo.JDODataStoreException: Communications link failure 
due to underlying exception:

** BEGIN NESTED EXCEPTION **

java.net.SocketException
MESSAGE: Too many open files

STACKTRACE:

java.net.SocketException: Too many open files
	at java.net.Socket.createImpl(Socket.java:397)
	at java.net.Socket.<init>(Socket.java:371)
	at java.net.Socket.<init>(Socket.java:218)
	at 
com.mysql.jdbc.StandardSocketFactory.connect(StandardSocketFactory.java:256)
	at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:271)
	at com.mysql.jdbc.Connection.createNewIO(Connection.java:2771)
	at com.mysql.jdbc.Connection.<init>(Connection.java:1555)
	at 
com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:285)
	at 
org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:142)
	at 
org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:118)
	at 
org.datanucleus.store.rdbms.ConnectionProviderPriorityList.getConnection(ConnectionProviderPriorityList.java:59)
	at 
org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:458)
	at 
org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getXAResource(ConnectionFactoryImpl.java:418)
	at 
org.datanucleus.ConnectionManagerImpl.enlistResource(ConnectionManagerImpl.java:329)
	at 
org.datanucleus.ConnectionManagerImpl.allocateConnection(ConnectionManagerImpl.java:210)
	at 
org.datanucleus.store.rdbms.ConnectionFactoryImpl.getConnection(ConnectionFactoryImpl.java:345)
	at 
org.datanucleus.store.AbstractStoreManager.getConnection(AbstractStoreManager.java:267)
	at 
org.datanucleus.store.rdbms.query.SQLEvaluator.evaluate(SQLEvaluator.java:91)
	at 
org.datanucleus.store.rdbms.query.JDOQLQuery.performExecute(JDOQLQuery.java:287)
	at org.datanucleus.store.query.Query.executeQuery(Query.java:1475)
	at 
org.datanucleus.store.rdbms.query.JDOQLQuery.executeQuery(JDOQLQuery.java:244)
	at org.datanucleus.store.query.Query.executeWithArray(Query.java:1357)
	at org.datanucleus.jdo.JDOQuery.execute(JDOQuery.java:265)
	at 
org.apache.hadoop.hive.metastore.ObjectStore.getMTable(ObjectStore.java:554)
	at 
org.apache.hadoop.hive.metastore.ObjectStore.getTable(ObjectStore.java:497)
	at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_table(HiveMetaStore.java:388)
	at 
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getTable(HiveMetaStoreClient.java:431)
	at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:331)
	at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:310)
	at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:454)
	at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:135)
	at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:379)
	at org.apache.hadoop.hive.ql.Driver.run(Driver.java:285)
	at 
org.apache.hadoop.hive.service.HiveServer$HiveServerHandler.execute(HiveServer.java:115)
	at 
org.apache.hadoop.hive.service.ThriftHive$Processor$execute.process(ThriftHive.java:340)
	at 
org.apache.hadoop.hive.service.ThriftHive$Processor.process(ThriftHive.java:328)
	at 
org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:252)
	at 
java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
	at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
	at java.lang.Thread.run(Thread.java:619)


** END NESTED EXCEPTION **



Last packet sent to the server was 1 ms ago.
NestedThrowables:
com.mysql.jdbc.CommunicationsException: Communications link failure due 
to underlying exception:

** BEGIN NESTED EXCEPTION **

java.net.SocketException
MESSAGE: Too many open files

STACKTRACE:

java.net.SocketException: Too many open files
	at java.net.Socket.createImpl(Socket.java:397)
	at java.net.Socket.<init>(Socket.java:371)
	at java.net.Socket.<init>(Socket.java:218)
	at 
com.mysql.jdbc.StandardSocketFactory.connect(StandardSocketFactory.java:256)
	at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:271)
	at com.mysql.jdbc.Connection.createNewIO(Connection.java:2771)
	at com.mysql.jdbc.Connection.<init>(Connection.java:1555)
	at 
com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:285)
	at 
org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:142)
	at 
org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:118)
	at 
org.datanucleus.store.rdbms.ConnectionProviderPriorityList.getConnection(ConnectionProviderPriorityList.java:59)
	at 
org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:458)
	at 
org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getXAResource(ConnectionFactoryImpl.java:418)
	at 
org.datanucleus.ConnectionManagerImpl.enlistResource(ConnectionManagerImpl.java:329)
	at 
org.datanucleus.ConnectionManagerImpl.allocateConnection(ConnectionManagerImpl.java:210)
	at 
org.datanucleus.store.rdbms.ConnectionFactoryImpl.getConnection(ConnectionFactoryImpl.java:345)
	at 
org.datanucleus.store.AbstractStoreManager.getConnection(AbstractStoreManager.java:267)
	at 
org.datanucleus.store.rdbms.query.SQLEvaluator.evaluate(SQLEvaluator.java:91)
	at 
org.datanucleus.store.rdbms.query.JDOQLQuery.performExecute(JDOQLQuery.java:287)
	at org.datanucleus.store.query.Query.executeQuery

Re: too many open files

Posted by Anurag Phadke <ap...@mozilla.com>.
filed @ https://issues.apache.org/jira/browse/HIVE-1508

On 8/2/10 2:05 PM, Carl Steinbach wrote:
> the hive_job_log_* files are created by the HiveHistory class. This
> class creates a PrintWriter for writing to the file, but never closes
> the writer. It looks like we need to add a cleanup method to HiveHistory
> that closes the PrintWriter and does any other necessary cleanup. Can
> you please file a bug for this issue?

Re: too many open files

Posted by Carl Steinbach <ca...@cloudera.com>.
Hi Anurag,

the hive_job_log_* files are created by the HiveHistory class. This class
creates a PrintWriter for writing to the file, but never closes the writer.
It looks like we need to add a cleanup method to HiveHistory that closes the
PrintWriter and does any other necessary cleanup. Can you please file a bug
for this issue?

Thanks.

Carl

On Mon, Aug 2, 2010 at 1:51 PM, Anurag Phadke <ap...@mozilla.com> wrote:

> Hi Carl,
> Thanks for the reply. I am currently monitoring file handles via:
> /usr/sbin/lsof | grep hive | wc -l
> The number is fairly stable, but spikes intermittently with the following
> files that get created:
> /tmp/aphadke/hive_job_log_aphadke_201008021315_656095069.txt
> and so on, mainly the pattern below:
> /tmp/aphadke/hive_job_log_*
>
> Sample content:
> SessionStart SESSION_ID="aphadke_201008021315" TIME="1280780118654"
> QueryStart QUERY_STRING="LOAD DATA INPATH 'hdfs://
> cm-hadoop01.mozilla.org/flume/cm-hadoop01.mozilla.org/2010-08-02/1315/hadooplog.00000018.20100802-131516958-0700.7823351645004687.seq.gz'
> INTO TABLE logs_temp PARTITION (ds='2010-08-02', ts='13-15', hn='
> cm-hadoop01.mozilla.org')" QUERY_ID="aphadke_20100802131515"
> TIME="1280780147562"
> TaskStart TASK_NAME="org.apache.hadoop.hive.ql.exec.MoveTask"
> TASK_ID="Stage-0" QUERY_ID="aphadke_20100802131515" TIME="1280780147564"
> TaskEnd TASK_RET_CODE="0"
> TASK_NAME="org.apache.hadoop.hive.ql.exec.MoveTask" TASK_ID="Stage-0"
> QUERY_ID="aphadke_20100802131515" TIME="1280780147902"
> QueryEnd QUERY_STRING="LOAD DATA INPATH 'hdfs://
> cm-hadoop01.mozilla.org/flume/cm-hadoop01.mozilla.org/2010-08-02/1315/hadooplog.00000018.20100802-131516958-0700.7823351645004687.seq.gz'
> INTO TABLE logs_temp PARTITION (ds='2010-08-02', ts='13-15', hn='
> cm-hadoop01.mozilla.org')" QUERY_ID="aphadke_20100802131515"
> QUERY_RET_CODE="0" QUERY_NUM_TASKS="0" TIME="1280780147902"
>
>
> These files and their content look innocuous as they seem to contain
> session information, any idea on why we need them and if its possible to
> reduce the number of open files?
>
> -Anurag
>
>
> On 8/2/10 1:37 PM, Carl Steinbach wrote:
>
>> Hi Anurag,
>>
>> It looks like the MetaStore server is not properly closing JDBC
>> connections, or connection pooling is not working properly. As a first
>> step I would try to verify that process has lots of open sockets using
>> the output of lsof: 'lsof -i -p <jvm process id>'
>>
>> Thanks.
>>
>> Carl
>>
>> On Mon, Aug 2, 2010 at 1:14 PM, Anurag Phadke <aphadke@mozilla.com
>> <ma...@mozilla.com>> wrote:
>>
>>    We are running hive server with mysql as metastore. The server
>>    receives LOAD DATA hql every 30 sec (from flume), everything runs
>>    fine for about 90 minutes and then the hive server throws following
>>    error. (Note: The user running hive server has "unlimited" file
>>    handle limit.)
>>
>>    any ideas?
>>
>>    10/08/02 12:34:34 ERROR exec.MoveTask: Failed with exception Unable
>>    to fetch table logs_temp
>>    org.apache.hadoop.hive.ql.metadata.HiveException: Unable to fetch
>>    table logs_temp
>>            at
>>    org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:339)
>>            at
>>    org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:310)
>>            at
>>    org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:454)
>>            at
>>    org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:135)
>>            at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:379)
>>            at org.apache.hadoop.hive.ql.Driver.run(Driver.java:285)
>>            at
>>
>>  org.apache.hadoop.hive.service.HiveServer$HiveServerHandler.execute(HiveServer.java:115)
>>            at
>>
>>  org.apache.hadoop.hive.service.ThriftHive$Processor$execute.process(ThriftHive.java:340)
>>            at
>>
>>  org.apache.hadoop.hive.service.ThriftHive$Processor.process(ThriftHive.java:328)
>>            at
>>
>>  org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:252)
>>            at
>>
>>  java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>>            at
>>
>>  java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>>            at java.lang.Thread.run(Thread.java:619)
>>    Caused by: javax.jdo.JDODataStoreException: Communications link
>>    failure due to underlying exception:
>>
>>    ** BEGIN NESTED EXCEPTION **
>>
>>    java.net.SocketException
>>    MESSAGE: Too many open files
>>
>>    STACKTRACE:
>>
>>    java.net.SocketException: Too many open files
>>            at java.net.Socket.createImpl(Socket.java:397)
>>            at java.net.Socket.<init>(Socket.java:371)
>>            at java.net.Socket.<init>(Socket.java:218)
>>            at
>>
>>  com.mysql.jdbc.StandardSocketFactory.connect(StandardSocketFactory.java:256)
>>            at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:271)
>>            at com.mysql.jdbc.Connection.createNewIO(Connection.java:2771)
>>            at com.mysql.jdbc.Connection.<init>(Connection.java:1555)
>>            at
>>
>>  com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:285)
>>            at
>>
>>  org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:142)
>>            at
>>
>>  org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:118)
>>            at
>>
>>  org.datanucleus.store.rdbms.ConnectionProviderPriorityList.getConnection(ConnectionProviderPriorityList.java:59)
>>            at
>>
>>  org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:458)
>>            at
>>
>>  org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getXAResource(ConnectionFactoryImpl.java:418)
>>            at
>>
>>  org.datanucleus.ConnectionManagerImpl.enlistResource(ConnectionManagerImpl.java:329)
>>            at
>>
>>  org.datanucleus.ConnectionManagerImpl.allocateConnection(ConnectionManagerImpl.java:210)
>>            at
>>
>>  org.datanucleus.store.rdbms.ConnectionFactoryImpl.getConnection(ConnectionFactoryImpl.java:345)
>>            at
>>
>>  org.datanucleus.store.AbstractStoreManager.getConnection(AbstractStoreManager.java:267)
>>            at
>>
>>  org.datanucleus.store.rdbms.query.SQLEvaluator.evaluate(SQLEvaluator.java:91)
>>            at
>>
>>  org.datanucleus.store.rdbms.query.JDOQLQuery.performExecute(JDOQLQuery.java:287)
>>            at
>>    org.datanucleus.store.query.Query.executeQuery(Query.java:1475)
>>            at
>>
>>  org.datanucleus.store.rdbms.query.JDOQLQuery.executeQuery(JDOQLQuery.java:244)
>>            at
>>    org.datanucleus.store.query.Query.executeWithArray(Query.java:1357)
>>            at org.datanucleus.jdo.JDOQuery.execute(JDOQuery.java:265)
>>            at
>>
>>  org.apache.hadoop.hive.metastore.ObjectStore.getMTable(ObjectStore.java:554)
>>            at
>>
>>  org.apache.hadoop.hive.metastore.ObjectStore.getTable(ObjectStore.java:497)
>>            at
>>
>>  org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_table(HiveMetaStore.java:388)
>>            at
>>
>>  org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getTable(HiveMetaStoreClient.java:431)
>>            at
>>    org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:331)
>>            at
>>    org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:310)
>>            at
>>    org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:454)
>>            at
>>    org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:135)
>>            at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:379)
>>            at org.apache.hadoop.hive.ql.Driver.run(Driver.java:285)
>>            at
>>
>>  org.apache.hadoop.hive.service.HiveServer$HiveServerHandler.execute(HiveServer.java:115)
>>            at
>>
>>  org.apache.hadoop.hive.service.ThriftHive$Processor$execute.process(ThriftHive.java:340)
>>            at
>>
>>  org.apache.hadoop.hive.service.ThriftHive$Processor.process(ThriftHive.java:328)
>>            at
>>
>>  org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:252)
>>            at
>>
>>  java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>>            at
>>
>>  java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>>            at java.lang.Thread.run(Thread.java:619)
>>
>>
>>    ** END NESTED EXCEPTION **
>>
>>
>>
>>    Last packet sent to the server was 1 ms ago.
>>    NestedThrowables:
>>    com.mysql.jdbc.CommunicationsException: Communications link failure
>>    due to underlying exception:
>>
>>    ** BEGIN NESTED EXCEPTION **
>>
>>    java.net.SocketException
>>    MESSAGE: Too many open files
>>
>>    STACKTRACE:
>>
>>    java.net.SocketException: Too many open files
>>            at java.net.Socket.createImpl(Socket.java:397)
>>            at java.net.Socket.<init>(Socket.java:371)
>>            at java.net.Socket.<init>(Socket.java:218)
>>            at
>>
>>  com.mysql.jdbc.StandardSocketFactory.connect(StandardSocketFactory.java:256)
>>            at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:271)
>>            at com.mysql.jdbc.Connection.createNewIO(Connection.java:2771)
>>            at com.mysql.jdbc.Connection.<init>(Connection.java:1555)
>>            at
>>
>>  com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:285)
>>            at
>>
>>  org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:142)
>>            at
>>
>>  org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:118)
>>            at
>>
>>  org.datanucleus.store.rdbms.ConnectionProviderPriorityList.getConnection(ConnectionProviderPriorityList.java:59)
>>            at
>>
>>  org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:458)
>>            at
>>
>>  org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getXAResource(ConnectionFactoryImpl.java:418)
>>            at
>>
>>  org.datanucleus.ConnectionManagerImpl.enlistResource(ConnectionManagerImpl.java:329)
>>            at
>>
>>  org.datanucleus.ConnectionManagerImpl.allocateConnection(ConnectionManagerImpl.java:210)
>>            at
>>
>>  org.datanucleus.store.rdbms.ConnectionFactoryImpl.getConnection(ConnectionFactoryImpl.java:345)
>>            at
>>
>>  org.datanucleus.store.AbstractStoreManager.getConnection(AbstractStoreManager.java:267)
>>            at
>>
>>  org.datanucleus.store.rdbms.query.SQLEvaluator.evaluate(SQLEvaluator.java:91)
>>            at
>>
>>  org.datanucleus.store.rdbms.query.JDOQLQuery.performExecute(JDOQLQuery.java:287)
>>            at org.datanucleus.store.query.Query.executeQuery
>>
>>
>>

Re: too many open files

Posted by Anurag Phadke <ap...@mozilla.com>.
Hi Carl,
Thanks for the reply. I am currently monitoring file handles via:
/usr/sbin/lsof | grep hive | wc -l
The number is fairly stable, but spikes intermittently with the 
following files that get created:
/tmp/aphadke/hive_job_log_aphadke_201008021315_656095069.txt
and so on, mainly the pattern below:
/tmp/aphadke/hive_job_log_*

Sample content:
SessionStart SESSION_ID="aphadke_201008021315" TIME="1280780118654"
QueryStart QUERY_STRING="LOAD DATA INPATH 
'hdfs://cm-hadoop01.mozilla.org/flume/cm-hadoop01.mozilla.org/2010-08-02/1315/hadooplog.00000018.20100802-131516958-0700.7823351645004687.seq.gz' 
INTO TABLE logs_temp PARTITION (ds='2010-08-02', ts='13-15', 
hn='cm-hadoop01.mozilla.org')" QUERY_ID="aphadke_20100802131515" 
TIME="1280780147562"
TaskStart TASK_NAME="org.apache.hadoop.hive.ql.exec.MoveTask" 
TASK_ID="Stage-0" QUERY_ID="aphadke_20100802131515" TIME="1280780147564"
TaskEnd TASK_RET_CODE="0" 
TASK_NAME="org.apache.hadoop.hive.ql.exec.MoveTask" TASK_ID="Stage-0" 
QUERY_ID="aphadke_20100802131515" TIME="1280780147902"
QueryEnd QUERY_STRING="LOAD DATA INPATH 
'hdfs://cm-hadoop01.mozilla.org/flume/cm-hadoop01.mozilla.org/2010-08-02/1315/hadooplog.00000018.20100802-131516958-0700.7823351645004687.seq.gz' 
INTO TABLE logs_temp PARTITION (ds='2010-08-02', ts='13-15', 
hn='cm-hadoop01.mozilla.org')" QUERY_ID="aphadke_20100802131515" 
QUERY_RET_CODE="0" QUERY_NUM_TASKS="0" TIME="1280780147902"


These files and their content look innocuous as they seem to contain 
session information, any idea on why we need them and if its possible to 
reduce the number of open files?

-Anurag

On 8/2/10 1:37 PM, Carl Steinbach wrote:
> Hi Anurag,
>
> It looks like the MetaStore server is not properly closing JDBC
> connections, or connection pooling is not working properly. As a first
> step I would try to verify that process has lots of open sockets using
> the output of lsof: 'lsof -i -p <jvm process id>'
>
> Thanks.
>
> Carl
>
> On Mon, Aug 2, 2010 at 1:14 PM, Anurag Phadke <aphadke@mozilla.com
> <ma...@mozilla.com>> wrote:
>
>     We are running hive server with mysql as metastore. The server
>     receives LOAD DATA hql every 30 sec (from flume), everything runs
>     fine for about 90 minutes and then the hive server throws following
>     error. (Note: The user running hive server has "unlimited" file
>     handle limit.)
>
>     any ideas?
>
>     10/08/02 12:34:34 ERROR exec.MoveTask: Failed with exception Unable
>     to fetch table logs_temp
>     org.apache.hadoop.hive.ql.metadata.HiveException: Unable to fetch
>     table logs_temp
>             at
>     org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:339)
>             at
>     org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:310)
>             at
>     org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:454)
>             at
>     org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:135)
>             at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:379)
>             at org.apache.hadoop.hive.ql.Driver.run(Driver.java:285)
>             at
>     org.apache.hadoop.hive.service.HiveServer$HiveServerHandler.execute(HiveServer.java:115)
>             at
>     org.apache.hadoop.hive.service.ThriftHive$Processor$execute.process(ThriftHive.java:340)
>             at
>     org.apache.hadoop.hive.service.ThriftHive$Processor.process(ThriftHive.java:328)
>             at
>     org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:252)
>             at
>     java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>             at
>     java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>             at java.lang.Thread.run(Thread.java:619)
>     Caused by: javax.jdo.JDODataStoreException: Communications link
>     failure due to underlying exception:
>
>     ** BEGIN NESTED EXCEPTION **
>
>     java.net.SocketException
>     MESSAGE: Too many open files
>
>     STACKTRACE:
>
>     java.net.SocketException: Too many open files
>             at java.net.Socket.createImpl(Socket.java:397)
>             at java.net.Socket.<init>(Socket.java:371)
>             at java.net.Socket.<init>(Socket.java:218)
>             at
>     com.mysql.jdbc.StandardSocketFactory.connect(StandardSocketFactory.java:256)
>             at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:271)
>             at com.mysql.jdbc.Connection.createNewIO(Connection.java:2771)
>             at com.mysql.jdbc.Connection.<init>(Connection.java:1555)
>             at
>     com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:285)
>             at
>     org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:142)
>             at
>     org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:118)
>             at
>     org.datanucleus.store.rdbms.ConnectionProviderPriorityList.getConnection(ConnectionProviderPriorityList.java:59)
>             at
>     org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:458)
>             at
>     org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getXAResource(ConnectionFactoryImpl.java:418)
>             at
>     org.datanucleus.ConnectionManagerImpl.enlistResource(ConnectionManagerImpl.java:329)
>             at
>     org.datanucleus.ConnectionManagerImpl.allocateConnection(ConnectionManagerImpl.java:210)
>             at
>     org.datanucleus.store.rdbms.ConnectionFactoryImpl.getConnection(ConnectionFactoryImpl.java:345)
>             at
>     org.datanucleus.store.AbstractStoreManager.getConnection(AbstractStoreManager.java:267)
>             at
>     org.datanucleus.store.rdbms.query.SQLEvaluator.evaluate(SQLEvaluator.java:91)
>             at
>     org.datanucleus.store.rdbms.query.JDOQLQuery.performExecute(JDOQLQuery.java:287)
>             at
>     org.datanucleus.store.query.Query.executeQuery(Query.java:1475)
>             at
>     org.datanucleus.store.rdbms.query.JDOQLQuery.executeQuery(JDOQLQuery.java:244)
>             at
>     org.datanucleus.store.query.Query.executeWithArray(Query.java:1357)
>             at org.datanucleus.jdo.JDOQuery.execute(JDOQuery.java:265)
>             at
>     org.apache.hadoop.hive.metastore.ObjectStore.getMTable(ObjectStore.java:554)
>             at
>     org.apache.hadoop.hive.metastore.ObjectStore.getTable(ObjectStore.java:497)
>             at
>     org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_table(HiveMetaStore.java:388)
>             at
>     org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getTable(HiveMetaStoreClient.java:431)
>             at
>     org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:331)
>             at
>     org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:310)
>             at
>     org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:454)
>             at
>     org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:135)
>             at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:379)
>             at org.apache.hadoop.hive.ql.Driver.run(Driver.java:285)
>             at
>     org.apache.hadoop.hive.service.HiveServer$HiveServerHandler.execute(HiveServer.java:115)
>             at
>     org.apache.hadoop.hive.service.ThriftHive$Processor$execute.process(ThriftHive.java:340)
>             at
>     org.apache.hadoop.hive.service.ThriftHive$Processor.process(ThriftHive.java:328)
>             at
>     org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:252)
>             at
>     java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>             at
>     java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>             at java.lang.Thread.run(Thread.java:619)
>
>
>     ** END NESTED EXCEPTION **
>
>
>
>     Last packet sent to the server was 1 ms ago.
>     NestedThrowables:
>     com.mysql.jdbc.CommunicationsException: Communications link failure
>     due to underlying exception:
>
>     ** BEGIN NESTED EXCEPTION **
>
>     java.net.SocketException
>     MESSAGE: Too many open files
>
>     STACKTRACE:
>
>     java.net.SocketException: Too many open files
>             at java.net.Socket.createImpl(Socket.java:397)
>             at java.net.Socket.<init>(Socket.java:371)
>             at java.net.Socket.<init>(Socket.java:218)
>             at
>     com.mysql.jdbc.StandardSocketFactory.connect(StandardSocketFactory.java:256)
>             at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:271)
>             at com.mysql.jdbc.Connection.createNewIO(Connection.java:2771)
>             at com.mysql.jdbc.Connection.<init>(Connection.java:1555)
>             at
>     com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:285)
>             at
>     org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:142)
>             at
>     org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:118)
>             at
>     org.datanucleus.store.rdbms.ConnectionProviderPriorityList.getConnection(ConnectionProviderPriorityList.java:59)
>             at
>     org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:458)
>             at
>     org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getXAResource(ConnectionFactoryImpl.java:418)
>             at
>     org.datanucleus.ConnectionManagerImpl.enlistResource(ConnectionManagerImpl.java:329)
>             at
>     org.datanucleus.ConnectionManagerImpl.allocateConnection(ConnectionManagerImpl.java:210)
>             at
>     org.datanucleus.store.rdbms.ConnectionFactoryImpl.getConnection(ConnectionFactoryImpl.java:345)
>             at
>     org.datanucleus.store.AbstractStoreManager.getConnection(AbstractStoreManager.java:267)
>             at
>     org.datanucleus.store.rdbms.query.SQLEvaluator.evaluate(SQLEvaluator.java:91)
>             at
>     org.datanucleus.store.rdbms.query.JDOQLQuery.performExecute(JDOQLQuery.java:287)
>             at org.datanucleus.store.query.Query.executeQuery
>
>

Re: too many open files

Posted by Carl Steinbach <ca...@cloudera.com>.
Hi Anurag,

It looks like the MetaStore server is not properly closing JDBC connections,
or connection pooling is not working properly. As a first step I would try
to verify that process has lots of open sockets using the output of lsof:
'lsof -i -p <jvm process id>'

Thanks.

Carl

On Mon, Aug 2, 2010 at 1:14 PM, Anurag Phadke <ap...@mozilla.com> wrote:

> We are running hive server with mysql as metastore. The server receives
> LOAD DATA hql every 30 sec (from flume), everything runs fine for about 90
> minutes and then the hive server throws following error. (Note: The user
> running hive server has "unlimited" file handle limit.)
>
> any ideas?
>
> 10/08/02 12:34:34 ERROR exec.MoveTask: Failed with exception Unable to
> fetch table logs_temp
> org.apache.hadoop.hive.ql.metadata.HiveException: Unable to fetch table
> logs_temp
>        at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:339)
>        at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:310)
>        at
> org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:454)
>        at
> org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:135)
>        at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:379)
>        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:285)
>        at
> org.apache.hadoop.hive.service.HiveServer$HiveServerHandler.execute(HiveServer.java:115)
>        at
> org.apache.hadoop.hive.service.ThriftHive$Processor$execute.process(ThriftHive.java:340)
>        at
> org.apache.hadoop.hive.service.ThriftHive$Processor.process(ThriftHive.java:328)
>        at
> org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:252)
>        at
> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>        at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>        at java.lang.Thread.run(Thread.java:619)
> Caused by: javax.jdo.JDODataStoreException: Communications link failure due
> to underlying exception:
>
> ** BEGIN NESTED EXCEPTION **
>
> java.net.SocketException
> MESSAGE: Too many open files
>
> STACKTRACE:
>
> java.net.SocketException: Too many open files
>        at java.net.Socket.createImpl(Socket.java:397)
>        at java.net.Socket.<init>(Socket.java:371)
>        at java.net.Socket.<init>(Socket.java:218)
>        at
> com.mysql.jdbc.StandardSocketFactory.connect(StandardSocketFactory.java:256)
>        at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:271)
>        at com.mysql.jdbc.Connection.createNewIO(Connection.java:2771)
>        at com.mysql.jdbc.Connection.<init>(Connection.java:1555)
>        at
> com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:285)
>        at
> org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:142)
>        at
> org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:118)
>        at
> org.datanucleus.store.rdbms.ConnectionProviderPriorityList.getConnection(ConnectionProviderPriorityList.java:59)
>        at
> org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:458)
>        at
> org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getXAResource(ConnectionFactoryImpl.java:418)
>        at
> org.datanucleus.ConnectionManagerImpl.enlistResource(ConnectionManagerImpl.java:329)
>        at
> org.datanucleus.ConnectionManagerImpl.allocateConnection(ConnectionManagerImpl.java:210)
>        at
> org.datanucleus.store.rdbms.ConnectionFactoryImpl.getConnection(ConnectionFactoryImpl.java:345)
>        at
> org.datanucleus.store.AbstractStoreManager.getConnection(AbstractStoreManager.java:267)
>        at
> org.datanucleus.store.rdbms.query.SQLEvaluator.evaluate(SQLEvaluator.java:91)
>        at
> org.datanucleus.store.rdbms.query.JDOQLQuery.performExecute(JDOQLQuery.java:287)
>        at org.datanucleus.store.query.Query.executeQuery(Query.java:1475)
>        at
> org.datanucleus.store.rdbms.query.JDOQLQuery.executeQuery(JDOQLQuery.java:244)
>        at
> org.datanucleus.store.query.Query.executeWithArray(Query.java:1357)
>        at org.datanucleus.jdo.JDOQuery.execute(JDOQuery.java:265)
>        at
> org.apache.hadoop.hive.metastore.ObjectStore.getMTable(ObjectStore.java:554)
>        at
> org.apache.hadoop.hive.metastore.ObjectStore.getTable(ObjectStore.java:497)
>        at
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_table(HiveMetaStore.java:388)
>        at
> org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getTable(HiveMetaStoreClient.java:431)
>        at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:331)
>        at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:310)
>        at
> org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:454)
>        at
> org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:135)
>        at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:379)
>        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:285)
>        at
> org.apache.hadoop.hive.service.HiveServer$HiveServerHandler.execute(HiveServer.java:115)
>        at
> org.apache.hadoop.hive.service.ThriftHive$Processor$execute.process(ThriftHive.java:340)
>        at
> org.apache.hadoop.hive.service.ThriftHive$Processor.process(ThriftHive.java:328)
>        at
> org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:252)
>        at
> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>        at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>        at java.lang.Thread.run(Thread.java:619)
>
>
> ** END NESTED EXCEPTION **
>
>
>
> Last packet sent to the server was 1 ms ago.
> NestedThrowables:
> com.mysql.jdbc.CommunicationsException: Communications link failure due to
> underlying exception:
>
> ** BEGIN NESTED EXCEPTION **
>
> java.net.SocketException
> MESSAGE: Too many open files
>
> STACKTRACE:
>
> java.net.SocketException: Too many open files
>        at java.net.Socket.createImpl(Socket.java:397)
>        at java.net.Socket.<init>(Socket.java:371)
>        at java.net.Socket.<init>(Socket.java:218)
>        at
> com.mysql.jdbc.StandardSocketFactory.connect(StandardSocketFactory.java:256)
>        at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:271)
>        at com.mysql.jdbc.Connection.createNewIO(Connection.java:2771)
>        at com.mysql.jdbc.Connection.<init>(Connection.java:1555)
>        at
> com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:285)
>        at
> org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:142)
>        at
> org.datanucleus.store.rdbms.datasource.DriverManagerDataSource.getConnection(DriverManagerDataSource.java:118)
>        at
> org.datanucleus.store.rdbms.ConnectionProviderPriorityList.getConnection(ConnectionProviderPriorityList.java:59)
>        at
> org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:458)
>        at
> org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getXAResource(ConnectionFactoryImpl.java:418)
>        at
> org.datanucleus.ConnectionManagerImpl.enlistResource(ConnectionManagerImpl.java:329)
>        at
> org.datanucleus.ConnectionManagerImpl.allocateConnection(ConnectionManagerImpl.java:210)
>        at
> org.datanucleus.store.rdbms.ConnectionFactoryImpl.getConnection(ConnectionFactoryImpl.java:345)
>        at
> org.datanucleus.store.AbstractStoreManager.getConnection(AbstractStoreManager.java:267)
>        at
> org.datanucleus.store.rdbms.query.SQLEvaluator.evaluate(SQLEvaluator.java:91)
>        at
> org.datanucleus.store.rdbms.query.JDOQLQuery.performExecute(JDOQLQuery.java:287)
>        at org.datanucleus.store.query.Query.executeQuery
>