You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@drill.apache.org by "Jacques Nadeau (JIRA)" <ji...@apache.org> on 2015/01/04 22:28:41 UTC

[jira] [Updated] (DRILL-1139) Drillbit fails with OutOfMemoryError Exception when Drill-smoke test is run for a long time

     [ https://issues.apache.org/jira/browse/DRILL-1139?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Jacques Nadeau updated DRILL-1139:
----------------------------------
    Priority: Critical  (was: Major)

> Drillbit fails with OutOfMemoryError Exception when Drill-smoke test is run for a long time
> -------------------------------------------------------------------------------------------
>
>                 Key: DRILL-1139
>                 URL: https://issues.apache.org/jira/browse/DRILL-1139
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Execution - Flow
>            Reporter: Amit Katti
>            Assignee: Parth Chandra
>            Priority: Critical
>             Fix For: 0.8.0
>
>
> I ran the Drill-smoke test in an infinite loop on a cluster with 2 drillbits.
> After about 11 hours of running successfully, the smoke test started to fail and both drillbits went down.
> I had also put in the below option in the /etc/drill/conf/drill-env.sh file:
> export DRILL_JAVA_OPTS="-Xms$DRILL_INIT_HEAP -Xmx$DRILL_MAX_HEAP -XX:MaxDirectMemorySize=$DRILL_MAX_DIRECT_MEMORY -ea -XX:MaxPermSize=512M -XX:+UseConcMarkSweepGC -XX:ReservedCodeCacheSize=1G -XX:+CMSClassUnloadingEnabled"
> The error message at the smoke test was:
> {code}
> 2014-07-12 05:36:34 INFO  ClientCnxn:852 - Socket connection established to 10.10.30.156/10.10.30.156:5181, initiating session
> 2014-07-12 05:36:34 ERROR ConnectionState:201 - Connection timed out for connection string (10.10.30.156:5181) and timeout (5000) / elapsed (5003)
> org.apache.curator.CuratorConnectionLossException: KeeperErrorCode = ConnectionLoss
> 	at org.apache.curator.ConnectionState.checkTimeouts(ConnectionState.java:198)
> 	at org.apache.curator.ConnectionState.getZooKeeper(ConnectionState.java:88)
> 	at org.apache.curator.CuratorZookeeperClient.getZooKeeper(CuratorZookeeperClient.java:115)
> 	at org.apache.curator.utils.EnsurePath$InitialHelper$1.call(EnsurePath.java:148)
> 	at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107)
> 	at org.apache.curator.utils.EnsurePath$InitialHelper.ensure(EnsurePath.java:140)
> 	at org.apache.curator.utils.EnsurePath.ensure(EnsurePath.java:99)
> 	at org.apache.curator.framework.imps.NamespaceImpl.fixForNamespace(NamespaceImpl.java:74)
> 	at org.apache.curator.framework.imps.NamespaceImpl.newNamespaceAwareEnsurePath(NamespaceImpl.java:87)
> 	at org.apache.curator.framework.imps.CuratorFrameworkImpl.newNamespaceAwareEnsurePath(CuratorFrameworkImpl.java:468)
> 	at org.apache.curator.framework.recipes.cache.PathChildrenCache.<init>(PathChildrenCache.java:223)
> 	at org.apache.curator.framework.recipes.cache.PathChildrenCache.<init>(PathChildrenCache.java:182)
> 	at org.apache.curator.x.discovery.details.ServiceCacheImpl.<init>(ServiceCacheImpl.java:65)
> 	at org.apache.curator.x.discovery.details.ServiceCacheBuilderImpl.build(ServiceCacheBuilderImpl.java:47)
> 	at org.apache.drill.exec.coord.zk.ZKClusterCoordinator.<init>(ZKClusterCoordinator.java:81)
> 	at org.apache.drill.exec.client.DrillClient.connect(DrillClient.java:144)
> 	at org.apache.drill.jdbc.DrillConnectionImpl.<init>(DrillConnectionImpl.java:90)
> 	at org.apache.drill.jdbc.DrillJdbc41Factory$DrillJdbc41Connection.<init>(DrillJdbc41Factory.java:87)
> 	at org.apache.drill.jdbc.DrillJdbc41Factory.newDrillConnection(DrillJdbc41Factory.java:56)
> 	at org.apache.drill.jdbc.DrillJdbc41Factory.newDrillConnection(DrillJdbc41Factory.java:43)
> 	at org.apache.drill.jdbc.DrillFactory.newConnection(DrillFactory.java:51)
> 	at net.hydromatic.avatica.UnregisteredDriver.connect(UnregisteredDriver.java:126)
> 	at java.sql.DriverManager.getConnection(DriverManager.java:571)
> 	at java.sql.DriverManager.getConnection(DriverManager.java:233)
> 	at org.apache.drill.test.framework.DrillTestBase.runTest(DrillTestBase.java:172)
> 	at org.apache.drill.test.framework.DrillTests.positiveTests(DrillTests.java:32)
> 	at sun.reflect.GeneratedMethodAccessor12.invoke(Unknown Source)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:606)
> 	at org.testng.internal.MethodInvocationHelper.invokeMethod(MethodInvocationHelper.java:80)
> 	at org.testng.internal.Invoker.invokeMethod(Invoker.java:701)
> 	at org.testng.internal.Invoker.invokeTestMethod(Invoker.java:893)
> 	at org.testng.internal.Invoker.invokeTestMethods(Invoker.java:1218)
> 	at org.testng.internal.TestMethodWorker.invokeTestMethods(TestMethodWorker.java:127)
> 	at org.testng.internal.TestMethodWorker.run(TestMethodWorker.java:111)
> 	at org.testng.TestRunner.privateRun(TestRunner.java:758)
> 	at org.testng.TestRunner.run(TestRunner.java:613)
> 	at org.testng.SuiteRunner.runTest(SuiteRunner.java:334)
> 	at org.testng.SuiteRunner.runSequentially(SuiteRunner.java:329)
> 	at org.testng.SuiteRunner.privateRun(SuiteRunner.java:291)
> 	at org.testng.SuiteRunner.run(SuiteRunner.java:240)
> 	at org.testng.SuiteRunnerWorker.runSuite(SuiteRunnerWorker.java:53)
> 	at org.testng.SuiteRunnerWorker.run(SuiteRunnerWorker.java:87)
> 	at org.testng.TestNG.runSuitesSequentially(TestNG.java:1170)
> 	at org.testng.TestNG.runSuitesLocally(TestNG.java:1095)
> 	at org.testng.TestNG.run(TestNG.java:1007)
> 	at org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:70)
> 	at org.apache.maven.surefire.testng.TestNGDirectoryTestSuite.execute(TestNGDirectoryTestSuite.java:102)
> 	at org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:114)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:606)
> 	at org.apache.maven.surefire.booter.ProviderFactory$ClassLoaderProxy.invoke(ProviderFactory.java:103)
> 	at com.sun.proxy.$Proxy0.invoke(Unknown Source)
> 	at org.apache.maven.surefire.booter.SurefireStarter.invokeProvider(SurefireStarter.java:150)
> 	at org.apache.maven.surefire.booter.SurefireStarter.runSuitesInProcess(SurefireStarter.java:91)
> 	at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:69)
> {code}
> The exception in the drillbit.log was:
> {code}
> 2014-07-11 02:02:39,506 [5e9e75ae-419a-4aac-a2aa-9c4253563699:foreman] ERROR o.a.drill.exec.work.foreman.Foreman - Error 8c6dffab-e845-4e9e-a75b-60649d64c337: Failure while setting up Foreman.
> java.lang.OutOfMemoryError: PermGen space
> 	at sun.misc.Unsafe.defineClass(Native Method) ~[na:1.7.0_55]
> 	at sun.reflect.ClassDefiner.defineClass(ClassDefiner.java:63) ~[na:1.7.0_55]
> 	at sun.reflect.MethodAccessorGenerator$1.run(MethodAccessorGenerator.java:399) ~[na:1.7.0_55]
> 	at sun.reflect.MethodAccessorGenerator$1.run(MethodAccessorGenerator.java:396) ~[na:1.7.0_55]
> 	at java.security.AccessController.doPrivileged(Native Method) ~[na:1.7.0_55]
> 	at sun.reflect.MethodAccessorGenerator.generate(MethodAccessorGenerator.java:395) ~[na:1.7.0_55]
> 	at sun.reflect.MethodAccessorGenerator.generateConstructor(MethodAccessorGenerator.java:94) ~[na:1.7.0_55]
> 	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:48) ~[na:1.7.0_55]
> 	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) ~[na:1.7.0_55]
> 	at java.lang.reflect.Constructor.newInstance(Constructor.java:526) ~[na:1.7.0_55]
> 	at java.lang.reflect.Proxy.newInstance(Proxy.java:748) ~[na:1.7.0_55]
> 	at java.lang.reflect.Proxy.newProxyInstance(Proxy.java:739) ~[na:1.7.0_55]
> 	at org.eigenbase.rel.metadata.ReflectiveRelMetadataProvider$2.apply(ReflectiveRelMetadataProvider.java:112) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.rel.metadata.ReflectiveRelMetadataProvider$2.apply(ReflectiveRelMetadataProvider.java:1) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.rel.metadata.MetadataFactoryImpl.query(MetadataFactoryImpl.java:71) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.rel.AbstractRelNode.metadata(AbstractRelNode.java:269) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.rel.metadata.RelMetadataQuery.getNonCumulativeCost(RelMetadataQuery.java:121) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.relopt.volcano.VolcanoPlanner.getCost(VolcanoPlanner.java:918) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.relopt.volcano.RelSubset.propagateCostImprovements0(RelSubset.java:333) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.relopt.volcano.RelSubset.propagateCostImprovements(RelSubset.java:314) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.relopt.volcano.RelSubset.propagateCostImprovements0(RelSubset.java:349) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.relopt.volcano.RelSubset.propagateCostImprovements(RelSubset.java:314) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.relopt.volcano.VolcanoPlanner.asd(VolcanoPlanner.java:1611) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.relopt.volcano.VolcanoPlanner.registerImpl(VolcanoPlanner.java:1549) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.relopt.volcano.VolcanoPlanner.register(VolcanoPlanner.java:829) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.relopt.volcano.VolcanoPlanner.ensureRegistered(VolcanoPlanner.java:852) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.relopt.volcano.VolcanoPlanner.ensureRegistered(VolcanoPlanner.java:1726) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.relopt.volcano.VolcanoRuleCall.transformTo(VolcanoRuleCall.java:129) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.relopt.RelOptRuleCall.transformTo(RelOptRuleCall.java:210) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.apache.drill.exec.planner.physical.ScanPrule.onMatch(ScanPrule.java:49) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
> 	at org.eigenbase.relopt.volcano.VolcanoRuleCall.onMatch(VolcanoRuleCall.java:221) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 	at org.eigenbase.relopt.volcano.VolcanoPlanner.findBestExp(VolcanoPlanner.java:653) ~[optiq-core-0.7-20140708.001905-9.jar:na]
> 2014-07-11 02:05:24,124 [ShutdownHook] INFO  o.apache.drill.exec.server.Drillbit - Received shutdown request.
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)