You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@flink.apache.org by "Dian Fu (Jira)" <ji...@apache.org> on 2020/10/05 00:18:00 UTC

[jira] [Commented] (FLINK-19458) ZooKeeperLeaderElectionITCase.testJobExecutionOnClusterWithLeaderChange: ZooKeeper unexpectedly modified

    [ https://issues.apache.org/jira/browse/FLINK-19458?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17207789#comment-17207789 ] 

Dian Fu commented on FLINK-19458:
---------------------------------

Another instance: https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=7184&view=logs&j=39d5b1d5-3b41-54dc-6458-1e2ddd1cdcf3&t=a99e99c7-21cd-5a1f-7274-585e62b72f56

> ZooKeeperLeaderElectionITCase.testJobExecutionOnClusterWithLeaderChange: ZooKeeper unexpectedly modified
> --------------------------------------------------------------------------------------------------------
>
>                 Key: FLINK-19458
>                 URL: https://issues.apache.org/jira/browse/FLINK-19458
>             Project: Flink
>          Issue Type: Bug
>          Components: Runtime / Coordination
>    Affects Versions: 1.12.0
>            Reporter: Robert Metzger
>            Priority: Critical
>              Labels: test-stability
>             Fix For: 1.12.0
>
>
> https://dev.azure.com/rmetzger/Flink/_build/results?buildId=8422&view=logs&j=70ad9b63-500e-5dc9-5a3c-b60356162d7e&t=944c7023-8984-5aa2-b5f8-54922bd90d3a
> {code}
> 2020-09-29T13:34:18.1803081Z [ERROR] testJobExecutionOnClusterWithLeaderChange(org.apache.flink.test.runtime.leaderelection.ZooKeeperLeaderElectionITCase)  Time elapsed: 23.524 s  <<< ERROR!
> 2020-09-29T13:34:18.1803707Z java.util.concurrent.ExecutionException: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit job.
> 2020-09-29T13:34:18.1804343Z 	at java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
> 2020-09-29T13:34:18.1804738Z 	at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908)
> 2020-09-29T13:34:18.1805274Z 	at org.apache.flink.test.runtime.leaderelection.ZooKeeperLeaderElectionITCase.testJobExecutionOnClusterWithLeaderChange(ZooKeeperLeaderElectionITCase.java:117)
> 2020-09-29T13:34:18.1805772Z 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 2020-09-29T13:34:18.1806136Z 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 2020-09-29T13:34:18.1806555Z 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 2020-09-29T13:34:18.1806936Z 	at java.lang.reflect.Method.invoke(Method.java:498)
> 2020-09-29T13:34:18.1807313Z 	at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
> 2020-09-29T13:34:18.1807731Z 	at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
> 2020-09-29T13:34:18.1808341Z 	at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
> 2020-09-29T13:34:18.1808973Z 	at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
> 2020-09-29T13:34:18.1809376Z 	at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
> 2020-09-29T13:34:18.1809851Z 	at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
> 2020-09-29T13:34:18.1810201Z 	at org.junit.rules.RunRules.evaluate(RunRules.java:20)
> 2020-09-29T13:34:18.1810632Z 	at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
> 2020-09-29T13:34:18.1811035Z 	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
> 2020-09-29T13:34:18.1811700Z 	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
> 2020-09-29T13:34:18.1812082Z 	at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
> 2020-09-29T13:34:18.1812447Z 	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
> 2020-09-29T13:34:18.1812824Z 	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
> 2020-09-29T13:34:18.1813190Z 	at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
> 2020-09-29T13:34:18.1813565Z 	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
> 2020-09-29T13:34:18.1813964Z 	at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
> 2020-09-29T13:34:18.1814364Z 	at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
> 2020-09-29T13:34:18.1814752Z 	at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
> 2020-09-29T13:34:18.1815298Z 	at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
> 2020-09-29T13:34:18.1816096Z 	at org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
> 2020-09-29T13:34:18.1816552Z 	at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
> 2020-09-29T13:34:18.1816984Z 	at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
> 2020-09-29T13:34:18.1817421Z 	at org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:384)
> 2020-09-29T13:34:18.1817894Z 	at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:345)
> 2020-09-29T13:34:18.1818318Z 	at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:126)
> 2020-09-29T13:34:18.1818888Z 	at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:418)
> 2020-09-29T13:34:18.1819294Z 	Suppressed: org.apache.flink.util.FlinkException: Could not close resource.
> 2020-09-29T13:34:18.1819698Z 		at org.apache.flink.util.AutoCloseableAsync.close(AutoCloseableAsync.java:42)
> 2020-09-29T13:34:18.1820260Z 		at org.apache.flink.test.runtime.leaderelection.ZooKeeperLeaderElectionITCase.testJobExecutionOnClusterWithLeaderChange(ZooKeeperLeaderElectionITCase.java:136)
> 2020-09-29T13:34:18.1820678Z 		... 30 more
> 2020-09-29T13:34:18.1821326Z 	Caused by: org.apache.flink.runtime.rpc.exceptions.FencingTokenException: Fencing token not set: Ignoring message LocalFencedMessage(null, LocalRpcInvocation(deregisterApplication(ApplicationStatus, String))) sent to akka://flink/user/rpc/resourcemanager_4 because the fencing token is null.
> 2020-09-29T13:34:18.1822143Z 		at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:63)
> 2020-09-29T13:34:18.1822621Z 		at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152)
> 2020-09-29T13:34:18.1823024Z 		at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
> 2020-09-29T13:34:18.1823397Z 		at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
> 2020-09-29T13:34:18.1823776Z 		at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123)
> 2020-09-29T13:34:18.1824306Z 		at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
> 2020-09-29T13:34:18.1824686Z 		at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170)
> 2020-09-29T13:34:18.1825066Z 		at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
> 2020-09-29T13:34:18.1825528Z 		at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
> 2020-09-29T13:34:18.1825883Z 		at akka.actor.Actor$class.aroundReceive(Actor.scala:517)
> 2020-09-29T13:34:18.1826238Z 		at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
> 2020-09-29T13:34:18.1826579Z 		at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
> 2020-09-29T13:34:18.1826912Z 		at akka.actor.ActorCell.invoke(ActorCell.scala:561)
> 2020-09-29T13:34:18.1827302Z 		at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
> 2020-09-29T13:34:18.1827609Z 		at akka.dispatch.Mailbox.run(Mailbox.scala:225)
> 2020-09-29T13:34:18.1827912Z 		at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
> 2020-09-29T13:34:18.1828252Z 		at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> 2020-09-29T13:34:18.1828629Z 		at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> 2020-09-29T13:34:18.1829062Z 		at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> 2020-09-29T13:34:18.1829468Z 		at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> 2020-09-29T13:34:18.1829871Z Caused by: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit job.
> 2020-09-29T13:34:18.1830321Z 	at org.apache.flink.runtime.dispatcher.Dispatcher.lambda$internalSubmitJob$2(Dispatcher.java:348)
> 2020-09-29T13:34:18.1830936Z 	at java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:836)
> 2020-09-29T13:34:18.1831356Z 	at java.util.concurrent.CompletableFuture$UniHandle.tryFire(CompletableFuture.java:811)
> 2020-09-29T13:34:18.1831797Z 	at java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:456)
> 2020-09-29T13:34:18.1832231Z 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> 2020-09-29T13:34:18.1832645Z 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> 2020-09-29T13:34:18.1833180Z 	at java.lang.Thread.run(Thread.java:748)
> 2020-09-29T13:34:18.1833761Z Caused by: java.util.ConcurrentModificationException: ZooKeeper unexpectedly modified
> 2020-09-29T13:34:18.1834413Z 	at org.apache.flink.runtime.zookeeper.ZooKeeperStateHandleStore.addAndLock(ZooKeeperStateHandleStore.java:158)
> 2020-09-29T13:34:18.1834954Z 	at org.apache.flink.runtime.jobmanager.ZooKeeperJobGraphStore.putJobGraph(ZooKeeperJobGraphStore.java:228)
> 2020-09-29T13:34:18.1835452Z 	at org.apache.flink.runtime.dispatcher.Dispatcher.persistAndRunJob(Dispatcher.java:356)
> 2020-09-29T13:34:18.1835943Z 	at org.apache.flink.runtime.dispatcher.Dispatcher.lambda$waitForTerminatingJob$28(Dispatcher.java:827)
> 2020-09-29T13:34:18.1836432Z 	at org.apache.flink.util.function.FunctionUtils.lambda$uncheckedConsumer$3(FunctionUtils.java:94)
> 2020-09-29T13:34:18.1837422Z 	at java.util.concurrent.CompletableFuture.uniAccept(CompletableFuture.java:670)
> 2020-09-29T13:34:18.1837857Z 	at java.util.concurrent.CompletableFuture$UniAccept.tryFire(CompletableFuture.java:646)
> 2020-09-29T13:34:18.1838282Z 	at java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:456)
> 2020-09-29T13:34:18.1838730Z 	at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:402)
> 2020-09-29T13:34:18.1839184Z 	at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:195)
> 2020-09-29T13:34:18.1839647Z 	at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74)
> 2020-09-29T13:34:18.1840123Z 	at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152)
> 2020-09-29T13:34:18.1840531Z 	at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
> 2020-09-29T13:34:18.1840895Z 	at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
> 2020-09-29T13:34:18.1841272Z 	at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123)
> 2020-09-29T13:34:18.1841659Z 	at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
> 2020-09-29T13:34:18.1842029Z 	at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170)
> 2020-09-29T13:34:18.1842490Z 	at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
> 2020-09-29T13:34:18.1842878Z 	at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
> 2020-09-29T13:34:18.1843223Z 	at akka.actor.Actor$class.aroundReceive(Actor.scala:517)
> 2020-09-29T13:34:18.1843742Z 	at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
> 2020-09-29T13:34:18.1844151Z 	at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
> 2020-09-29T13:34:18.1844467Z 	at akka.actor.ActorCell.invoke(ActorCell.scala:561)
> 2020-09-29T13:34:18.1844793Z 	at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
> 2020-09-29T13:34:18.1845114Z 	at akka.dispatch.Mailbox.run(Mailbox.scala:225)
> 2020-09-29T13:34:18.1845399Z 	at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
> 2020-09-29T13:34:18.1845735Z 	at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> 2020-09-29T13:34:18.1846125Z 	at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> 2020-09-29T13:34:18.1846508Z 	at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> 2020-09-29T13:34:18.1846905Z 	at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> 2020-09-29T13:34:18.1847397Z Caused by: org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.KeeperException$NodeExistsException: KeeperErrorCode = NodeExists
> 2020-09-29T13:34:18.1847933Z 	at org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.KeeperException.create(KeeperException.java:122)
> 2020-09-29T13:34:18.1848418Z 	at org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.ZooKeeper.multiInternal(ZooKeeper.java:1015)
> 2020-09-29T13:34:18.1848889Z 	at org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.ZooKeeper.multi(ZooKeeper.java:919)
> 2020-09-29T13:34:18.1849421Z 	at org.apache.flink.shaded.curator4.org.apache.curator.framework.imps.CuratorTransactionImpl.doOperation(CuratorTransactionImpl.java:197)
> 2020-09-29T13:34:18.1850005Z 	at org.apache.flink.shaded.curator4.org.apache.curator.framework.imps.CuratorTransactionImpl.access$000(CuratorTransactionImpl.java:37)
> 2020-09-29T13:34:18.1850598Z 	at org.apache.flink.shaded.curator4.org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:130)
> 2020-09-29T13:34:18.1851818Z 	at org.apache.flink.shaded.curator4.org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:126)
> 2020-09-29T13:34:18.1865959Z 	at org.apache.flink.shaded.curator4.org.apache.curator.connection.StandardConnectionHandlingPolicy.callWithRetry(StandardConnectionHandlingPolicy.java:64)
> 2020-09-29T13:34:18.1866608Z 	at org.apache.flink.shaded.curator4.org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:100)
> 2020-09-29T13:34:18.1867149Z 	at org.apache.flink.shaded.curator4.org.apache.curator.framework.imps.CuratorTransactionImpl.commit(CuratorTransactionImpl.java:123)
> 2020-09-29T13:34:18.1867700Z 	at org.apache.flink.runtime.zookeeper.ZooKeeperStateHandleStore.addAndLock(ZooKeeperStateHandleStore.java:152)
> 2020-09-29T13:34:18.1868034Z 	... 29 more
> 2020-09-29T13:34:18.1868164Z 
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)