You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@flink.apache.org by "Robert Metzger (Jira)" <ji...@apache.org> on 2020/05/25 05:34:00 UTC

[jira] [Reopened] (FLINK-15661) JobManagerHAProcessFailureRecoveryITCase.testDispatcherProcessFailure failed because of Could not find Flink job

     [ https://issues.apache.org/jira/browse/FLINK-15661?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Robert Metzger reopened FLINK-15661:
------------------------------------

I observed another failure of this test: https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=2085&view=logs&j=5c8e7682-d68f-54d1-16a2-a09310218a49&t=45cc9205-bdb7-5b54-63cd-89fdc0983323

{code}
2020-05-24T20:47:19.2825741Z [ERROR] Tests run: 2, Failures: 0, Errors: 1, Skipped: 0, Time elapsed: 31.901 s <<< FAILURE! - in org.apache.flink.test.recovery.JobManagerHAProcessFailureRecoveryITCase
2020-05-24T20:47:19.2826917Z [ERROR] testDispatcherProcessFailure[ExecutionMode PIPELINED](org.apache.flink.test.recovery.JobManagerHAProcessFailureRecoveryITCase)  Time elapsed: 15.971 s  <<< ERROR!
2020-05-24T20:47:19.2827780Z java.util.concurrent.ExecutionException: org.apache.flink.runtime.rpc.exceptions.RpcConnectionException: Could not connect to rpc endpoint under address akka.tcp://flink@127.0.0.1:45907/user/rpc/dispatcher_1.
2020-05-24T20:47:19.2828444Z 	at java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
2020-05-24T20:47:19.2829276Z 	at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908)
2020-05-24T20:47:19.2829840Z 	at org.apache.flink.test.recovery.JobManagerHAProcessFailureRecoveryITCase.testDispatcherProcessFailure(JobManagerHAProcessFailureRecoveryITCase.java:296)
2020-05-24T20:47:19.2830366Z 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
2020-05-24T20:47:19.2830750Z 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
2020-05-24T20:47:19.2831190Z 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
2020-05-24T20:47:19.2831592Z 	at java.lang.reflect.Method.invoke(Method.java:498)
2020-05-24T20:47:19.2832038Z 	at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
2020-05-24T20:47:19.2832502Z 	at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
2020-05-24T20:47:19.2832958Z 	at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
2020-05-24T20:47:19.2833405Z 	at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
2020-05-24T20:47:19.2833899Z 	at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
2020-05-24T20:47:19.2834319Z 	at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
2020-05-24T20:47:19.2834693Z 	at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
2020-05-24T20:47:19.2835056Z 	at org.junit.rules.RunRules.evaluate(RunRules.java:20)
2020-05-24T20:47:19.2835402Z 	at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
2020-05-24T20:47:19.2835814Z 	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
2020-05-24T20:47:19.2836404Z 	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
2020-05-24T20:47:19.2836824Z 	at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
2020-05-24T20:47:19.2837200Z 	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
2020-05-24T20:47:19.2847121Z 	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
2020-05-24T20:47:19.2847541Z 	at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
2020-05-24T20:47:19.2847920Z 	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
2020-05-24T20:47:19.2848299Z 	at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
2020-05-24T20:47:19.2848709Z 	at org.junit.runners.Suite.runChild(Suite.java:128)
2020-05-24T20:47:19.2849046Z 	at org.junit.runners.Suite.runChild(Suite.java:27)
2020-05-24T20:47:19.2849399Z 	at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
2020-05-24T20:47:19.2849766Z 	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
2020-05-24T20:47:19.2850156Z 	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
2020-05-24T20:47:19.2850531Z 	at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
2020-05-24T20:47:19.2850920Z 	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
2020-05-24T20:47:19.2851334Z 	at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
2020-05-24T20:47:19.2851773Z 	at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
2020-05-24T20:47:19.2852179Z 	at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
2020-05-24T20:47:19.2852576Z 	at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
2020-05-24T20:47:19.2853042Z 	at org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
2020-05-24T20:47:19.2853508Z 	at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
2020-05-24T20:47:19.2853966Z 	at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
2020-05-24T20:47:19.2854504Z 	at org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:384)
2020-05-24T20:47:19.2854991Z 	at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:345)
2020-05-24T20:47:19.2862514Z 	at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:126)
2020-05-24T20:47:19.2862941Z 	at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:418)
2020-05-24T20:47:19.2863566Z Caused by: org.apache.flink.runtime.rpc.exceptions.RpcConnectionException: Could not connect to rpc endpoint under address akka.tcp://flink@127.0.0.1:45907/user/rpc/dispatcher_1.
2020-05-24T20:47:19.2864277Z 	at org.apache.flink.runtime.rpc.akka.AkkaRpcService.lambda$resolveActorAddress$10(AkkaRpcService.java:520)
2020-05-24T20:47:19.2864785Z 	at scala.concurrent.java8.FuturesConvertersImpl$CF$$anon$1.accept(FutureConvertersImpl.scala:59)
2020-05-24T20:47:19.2865291Z 	at scala.concurrent.java8.FuturesConvertersImpl$CF$$anon$1.accept(FutureConvertersImpl.scala:53)
2020-05-24T20:47:19.2865752Z 	at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
2020-05-24T20:47:19.2866379Z 	at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
2020-05-24T20:47:19.2866879Z 	at java.util.concurrent.CompletableFuture$Completion.exec(CompletableFuture.java:457)
2020-05-24T20:47:19.2867290Z 	at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
2020-05-24T20:47:19.2867700Z 	at java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
2020-05-24T20:47:19.2868106Z 	at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692)
2020-05-24T20:47:19.2868522Z 	at java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:157)
2020-05-24T20:47:19.2869149Z Caused by: akka.actor.ActorNotFound: Actor not found for: ActorSelection[Anchor(akka.tcp://flink@127.0.0.1:45907/), Path(/user/rpc/dispatcher_1)]
2020-05-24T20:47:19.2869825Z 	at akka.actor.ActorSelection$$anonfun$resolveOne$1.apply(ActorSelection.scala:71)
2020-05-24T20:47:19.2870267Z 	at akka.actor.ActorSelection$$anonfun$resolveOne$1.apply(ActorSelection.scala:69)
2020-05-24T20:47:19.2870670Z 	at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:36)
2020-05-24T20:47:19.2871205Z 	at akka.dispatch.BatchingExecutor$AbstractBatch.processBatch(BatchingExecutor.scala:55)
2020-05-24T20:47:19.2871626Z 	at akka.dispatch.BatchingExecutor$Batch.run(BatchingExecutor.scala:73)
2020-05-24T20:47:19.2872081Z 	at akka.dispatch.ExecutionContexts$sameThreadExecutionContext$.unbatchedExecute(Future.scala:81)
2020-05-24T20:47:19.2872542Z 	at akka.dispatch.BatchingExecutor$class.execute(BatchingExecutor.scala:120)
2020-05-24T20:47:19.2872975Z 	at akka.dispatch.ExecutionContexts$sameThreadExecutionContext$.execute(Future.scala:80)
2020-05-24T20:47:19.2873418Z 	at scala.concurrent.impl.CallbackRunnable.executeWithValue(Promise.scala:44)
2020-05-24T20:47:19.2873832Z 	at scala.concurrent.impl.Promise$DefaultPromise.tryComplete(Promise.scala:252)
2020-05-24T20:47:19.2874277Z 	at akka.pattern.PromiseActorRef.$bang(AskSupport.scala:572)
2020-05-24T20:47:19.2874659Z 	at akka.remote.DefaultMessageDispatcher.dispatch(Endpoint.scala:101)
2020-05-24T20:47:19.2875070Z 	at akka.remote.EndpointReader$$anonfun$receive$2.applyOrElse(Endpoint.scala:999)
2020-05-24T20:47:19.2875467Z 	at akka.actor.Actor$class.aroundReceive(Actor.scala:517)
2020-05-24T20:47:19.2875813Z 	at akka.remote.EndpointActor.aroundReceive(Endpoint.scala:458)
2020-05-24T20:47:19.2876382Z 	at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
2020-05-24T20:47:19.2876726Z 	at akka.actor.ActorCell.invoke(ActorCell.scala:561)
2020-05-24T20:47:19.2877077Z 	at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
2020-05-24T20:47:19.2877418Z 	at akka.dispatch.Mailbox.run(Mailbox.scala:225)
2020-05-24T20:47:19.2877724Z 	at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
2020-05-24T20:47:19.2878080Z 	at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
2020-05-24T20:47:19.2878482Z 	at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
2020-05-24T20:47:19.2878975Z 	at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
2020-05-24T20:47:19.2881370Z 	at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
2020-05-24T20:47:19.2881875Z 
{code}

> JobManagerHAProcessFailureRecoveryITCase.testDispatcherProcessFailure failed because of Could not find Flink job 
> -----------------------------------------------------------------------------------------------------------------
>
>                 Key: FLINK-15661
>                 URL: https://issues.apache.org/jira/browse/FLINK-15661
>             Project: Flink
>          Issue Type: Bug
>          Components: Runtime / Coordination, Tests
>    Affects Versions: 1.11.0
>            Reporter: Congxian Qiu(klion26)
>            Priority: Critical
>              Labels: test-stability
>
> 2020-01-19T06:25:02.3856954Z [ERROR] JobManagerHAProcessFailureRecoveryITCase.testDispatcherProcessFailure:347 The program encountered a ExecutionException : org.apache.flink.runtime.rest.util.RestClientException: [org.apache.flink.runtime.rest.handler.RestHandlerException: org.apache.flink.runtime.messages.FlinkJobNotFoundException: Could not find Flink job (47fe3e8df0e59994938485f683d1410e)
>  2020-01-19T06:25:02.3857171Z at org.apache.flink.runtime.rest.handler.job.JobExecutionResultHandler.propagateException(JobExecutionResultHandler.java:91)
>  2020-01-19T06:25:02.3857571Z at org.apache.flink.runtime.rest.handler.job.JobExecutionResultHandler.lambda$handleRequest$1(JobExecutionResultHandler.java:82)
>  2020-01-19T06:25:02.3857866Z at java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:870)
>  2020-01-19T06:25:02.3857982Z at java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:852)
>  2020-01-19T06:25:02.3859852Z at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>  2020-01-19T06:25:02.3860440Z at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>  2020-01-19T06:25:02.3860732Z at org.apache.flink.runtime.concurrent.FutureUtils$1.onComplete(FutureUtils.java:872)
>  2020-01-19T06:25:02.3860960Z at akka.dispatch.OnComplete.internal(Future.scala:263)
>  2020-01-19T06:25:02.3861099Z at akka.dispatch.OnComplete.internal(Future.scala:261)
>  2020-01-19T06:25:02.3861232Z at akka.dispatch.japi$CallbackBridge.apply(Future.scala:191)
>  2020-01-19T06:25:02.3861391Z at akka.dispatch.japi$CallbackBridge.apply(Future.scala:188)
>  2020-01-19T06:25:02.3861546Z at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:36)
>  2020-01-19T06:25:02.3861712Z at org.apache.flink.runtime.concurrent.Executors$DirectExecutionContext.execute(Executors.java:74)
>  2020-01-19T06:25:02.3861809Z at scala.concurrent.impl.CallbackRunnable.executeWithValue(Promise.scala:44)
>  2020-01-19T06:25:02.3861916Z at scala.concurrent.impl.Promise$DefaultPromise.tryComplete(Promise.scala:252)
>  2020-01-19T06:25:02.3862221Z at akka.pattern.PromiseActorRef.$bang(AskSupport.scala:572)
>  2020-01-19T06:25:02.3862475Z at akka.pattern.PipeToSupport$PipeableFuture$$anonfun$pipeTo$1.applyOrElse(PipeToSupport.scala:23)
>  2020-01-19T06:25:02.3862626Z at akka.pattern.PipeToSupport$PipeableFuture$$anonfun$pipeTo$1.applyOrElse(PipeToSupport.scala:21)
>  2020-01-19T06:25:02.3862736Z at scala.concurrent.Future$$anonfun$andThen$1.apply(Future.scala:436)
>  2020-01-19T06:25:02.3862820Z at scala.concurrent.Future$$anonfun$andThen$1.apply(Future.scala:435)
>  2020-01-19T06:25:02.3867146Z at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:36)
>  2020-01-19T06:25:02.3867318Z at akka.dispatch.BatchingExecutor$AbstractBatch.processBatch(BatchingExecutor.scala:55)
>  2020-01-19T06:25:02.3867441Z at akka.dispatch.BatchingExecutor$BlockableBatch$$anonfun$run$1.apply$mcV$sp(BatchingExecutor.scala:91)
>  2020-01-19T06:25:02.3867552Z at akka.dispatch.BatchingExecutor$BlockableBatch$$anonfun$run$1.apply(BatchingExecutor.scala:91)
>  2020-01-19T06:25:02.3867664Z at akka.dispatch.BatchingExecutor$BlockableBatch$$anonfun$run$1.apply(BatchingExecutor.scala:91)
>  2020-01-19T06:25:02.3867763Z at scala.concurrent.BlockContext$.withBlockContext(BlockContext.scala:72)
>  2020-01-19T06:25:02.3867843Z at akka.dispatch.BatchingExecutor$BlockableBatch.run(BatchingExecutor.scala:90)
>  2020-01-19T06:25:02.3867936Z at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:40)
>  2020-01-19T06:25:02.3868036Z at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(ForkJoinExecutorConfigurator.scala:44)
>  2020-01-19T06:25:02.3868145Z at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
>  2020-01-19T06:25:02.3868223Z at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
>  2020-01-19T06:25:02.3868313Z at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
>  2020-01-19T06:25:02.3868390Z at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
>  2020-01-19T06:25:02.3868520Z Caused by: java.util.concurrent.CompletionException: org.apache.flink.runtime.messages.FlinkJobNotFoundException: Could not find Flink job (47fe3e8df0e59994938485f683d1410e)
>  2020-01-19T06:25:02.3868625Z at org.apache.flink.runtime.dispatcher.Dispatcher.lambda$requestJobStatus$17(Dispatcher.java:516)
>  2020-01-19T06:25:02.3868734Z at java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:870)
>  2020-01-19T06:25:02.3868831Z at java.util.concurrent.CompletableFuture.uniExceptionallyStage(CompletableFuture.java:884)
>  2020-01-19T06:25:02.3869143Z at java.util.concurrent.CompletableFuture.exceptionally(CompletableFuture.java:2196)
>  2020-01-19T06:25:02.3869241Z at org.apache.flink.runtime.dispatcher.Dispatcher.requestJobStatus(Dispatcher.java:510)
>  2020-01-19T06:25:02.3869319Z at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>  2020-01-19T06:25:02.3869418Z at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>  2020-01-19T06:25:02.3869506Z at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>  2020-01-19T06:25:02.3869602Z at java.lang.reflect.Method.invoke(Method.java:498)
>  2020-01-19T06:25:02.3869681Z at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:279)
>  2020-01-19T06:25:02.3869780Z at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:194)
>  2020-01-19T06:25:02.3869865Z at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:74)
>  2020-01-19T06:25:02.3869982Z at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:152)
>  2020-01-19T06:25:02.3870062Z at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
>  2020-01-19T06:25:02.3870153Z at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
>  2020-01-19T06:25:02.3870228Z at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123)
>  2020-01-19T06:25:02.3870399Z at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
>  2020-01-19T06:25:02.3870481Z at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170)
>  2020-01-19T06:25:02.3870571Z at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
>  2020-01-19T06:25:02.3870646Z at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
>  2020-01-19T06:25:02.3870733Z at akka.actor.Actor$class.aroundReceive(Actor.scala:517)
>  2020-01-19T06:25:02.3870911Z at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
>  2020-01-19T06:25:02.3871013Z at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
>  2020-01-19T06:25:02.3871086Z at akka.actor.ActorCell.invoke(ActorCell.scala:561)
>  2020-01-19T06:25:02.3871170Z at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
>  2020-01-19T06:25:02.3871350Z at akka.dispatch.Mailbox.run(Mailbox.scala:225)
>  2020-01-19T06:25:02.3871439Z at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
>  2020-01-19T06:25:02.3871509Z ... 4 more
>  2020-01-19T06:25:02.3871618Z Caused by: org.apache.flink.runtime.messages.FlinkJobNotFoundException: Could not find Flink job (47fe3e8df0e59994938485f683d1410e)
>  2020-01-19T06:25:02.3871721Z at org.apache.flink.runtime.dispatcher.Dispatcher.getJobMasterGatewayFuture(Dispatcher.java:776)
>  2020-01-19T06:25:02.3871827Z at org.apache.flink.runtime.dispatcher.Dispatcher.requestJobStatus(Dispatcher.java:505)
>  2020-01-19T06:25:02.3871903Z ... 26 more
>  2020-01-19T06:25:02.3871975Z ]
>  
> [https://dev.azure.com/rmetzger/5bd3ef0a-4359-41af-abca-811b04098d2e/_apis/build/builds/4461/logs/15]



--
This message was sent by Atlassian Jira
(v8.3.4#803005)