You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@flink.apache.org by "Flink Jira Bot (Jira)" <ji...@apache.org> on 2022/04/05 22:39:00 UTC

[jira] [Updated] (FLINK-25814) AdaptiveSchedulerITCase.testStopWithSavepointFailOnFirstSavepointSucceedOnSecond failed due to stop-with-savepoint failed

     [ https://issues.apache.org/jira/browse/FLINK-25814?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Flink Jira Bot updated FLINK-25814:
-----------------------------------
      Labels: auto-deprioritized-major test-stability  (was: stale-major test-stability)
    Priority: Minor  (was: Major)

This issue was labeled "stale-major" 7 days ago and has not received any updates so it is being deprioritized. If this ticket is actually Major, please raise the priority and ask a committer to assign you the issue or revive the public discussion.


> AdaptiveSchedulerITCase.testStopWithSavepointFailOnFirstSavepointSucceedOnSecond failed due to stop-with-savepoint failed
> -------------------------------------------------------------------------------------------------------------------------
>
>                 Key: FLINK-25814
>                 URL: https://issues.apache.org/jira/browse/FLINK-25814
>             Project: Flink
>          Issue Type: Bug
>          Components: Runtime / Checkpointing
>    Affects Versions: 1.13.5
>            Reporter: Yun Gao
>            Priority: Minor
>              Labels: auto-deprioritized-major, test-stability
>
> {code:java}
> 2022-01-25T05:37:28.6339368Z Jan 25 05:37:28 [ERROR] testStopWithSavepointFailOnFirstSavepointSucceedOnSecond(org.apache.flink.test.scheduling.AdaptiveSchedulerITCase)  Time elapsed: 300.269 s  <<< ERROR!
> 2022-01-25T05:37:28.6340216Z Jan 25 05:37:28 java.util.concurrent.ExecutionException: org.apache.flink.util.FlinkException: Stop with savepoint operation could not be completed.
> 2022-01-25T05:37:28.6342330Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
> 2022-01-25T05:37:28.6343776Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908)
> 2022-01-25T05:37:28.6344983Z Jan 25 05:37:28 	at org.apache.flink.test.scheduling.AdaptiveSchedulerITCase.testStopWithSavepointFailOnFirstSavepointSucceedOnSecond(AdaptiveSchedulerITCase.java:231)
> 2022-01-25T05:37:28.6346165Z Jan 25 05:37:28 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 2022-01-25T05:37:28.6347145Z Jan 25 05:37:28 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 2022-01-25T05:37:28.6348207Z Jan 25 05:37:28 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 2022-01-25T05:37:28.6349147Z Jan 25 05:37:28 	at java.lang.reflect.Method.invoke(Method.java:498)
> 2022-01-25T05:37:28.6350068Z Jan 25 05:37:28 	at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
> 2022-01-25T05:37:28.6351116Z Jan 25 05:37:28 	at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
> 2022-01-25T05:37:28.6352132Z Jan 25 05:37:28 	at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
> 2022-01-25T05:37:28.6353816Z Jan 25 05:37:28 	at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
> 2022-01-25T05:37:28.6354863Z Jan 25 05:37:28 	at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
> 2022-01-25T05:37:28.6355983Z Jan 25 05:37:28 	at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
> 2022-01-25T05:37:28.6356958Z Jan 25 05:37:28 	at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
> 2022-01-25T05:37:28.6357871Z Jan 25 05:37:28 	at org.apache.flink.util.TestNameProvider$1.evaluate(TestNameProvider.java:45)
> 2022-01-25T05:37:28.6358799Z Jan 25 05:37:28 	at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
> 2022-01-25T05:37:28.6359658Z Jan 25 05:37:28 	at org.junit.rules.RunRules.evaluate(RunRules.java:20)
> 2022-01-25T05:37:28.6360506Z Jan 25 05:37:28 	at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
> 2022-01-25T05:37:28.6361425Z Jan 25 05:37:28 	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
> 2022-01-25T05:37:28.6362486Z Jan 25 05:37:28 	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
> 2022-01-25T05:37:28.6364531Z Jan 25 05:37:28 	at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
> 2022-01-25T05:37:28.6365709Z Jan 25 05:37:28 	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
> 2022-01-25T05:37:28.6366600Z Jan 25 05:37:28 	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
> 2022-01-25T05:37:28.6367488Z Jan 25 05:37:28 	at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
> 2022-01-25T05:37:28.6368333Z Jan 25 05:37:28 	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
> 2022-01-25T05:37:28.6369236Z Jan 25 05:37:28 	at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
> 2022-01-25T05:37:28.6370133Z Jan 25 05:37:28 	at org.junit.rules.RunRules.evaluate(RunRules.java:20)
> 2022-01-25T05:37:28.6371056Z Jan 25 05:37:28 	at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
> 2022-01-25T05:37:28.6371957Z Jan 25 05:37:28 	at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
> 2022-01-25T05:37:28.6373128Z Jan 25 05:37:28 	at org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
> 2022-01-25T05:37:28.6374293Z Jan 25 05:37:28 	at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
> 2022-01-25T05:37:28.6375273Z Jan 25 05:37:28 	at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
> 2022-01-25T05:37:28.6376370Z Jan 25 05:37:28 	at org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:384)
> 2022-01-25T05:37:28.6377421Z Jan 25 05:37:28 	at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:345)
> 2022-01-25T05:37:28.6378437Z Jan 25 05:37:28 	at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:126)
> 2022-01-25T05:37:28.6379366Z Jan 25 05:37:28 	at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:418)
> 2022-01-25T05:37:28.6380382Z Jan 25 05:37:28 Caused by: org.apache.flink.util.FlinkException: Stop with savepoint operation could not be completed.
> 2022-01-25T05:37:28.6381481Z Jan 25 05:37:28 	at org.apache.flink.runtime.scheduler.adaptive.StopWithSavepoint.onLeave(StopWithSavepoint.java:118)
> 2022-01-25T05:37:28.6382614Z Jan 25 05:37:28 	at org.apache.flink.runtime.scheduler.adaptive.AdaptiveScheduler.transitionToState(AdaptiveScheduler.java:1138)
> 2022-01-25T05:37:28.6384051Z Jan 25 05:37:28 	at org.apache.flink.runtime.scheduler.adaptive.AdaptiveScheduler.goToExecuting(AdaptiveScheduler.java:787)
> 2022-01-25T05:37:28.6385172Z Jan 25 05:37:28 	at org.apache.flink.runtime.scheduler.adaptive.StopWithSavepoint.handleSavepointCompletion(StopWithSavepoint.java:106)
> 2022-01-25T05:37:28.6386431Z Jan 25 05:37:28 	at org.apache.flink.runtime.scheduler.adaptive.StopWithSavepoint.lambda$null$0(StopWithSavepoint.java:89)
> 2022-01-25T05:37:28.6387766Z Jan 25 05:37:28 	at org.apache.flink.runtime.scheduler.adaptive.AdaptiveScheduler.runIfState(AdaptiveScheduler.java:1093)
> 2022-01-25T05:37:28.6388968Z Jan 25 05:37:28 	at org.apache.flink.runtime.scheduler.adaptive.AdaptiveScheduler.lambda$runIfState$26(AdaptiveScheduler.java:1108)
> 2022-01-25T05:37:28.6390068Z Jan 25 05:37:28 	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> 2022-01-25T05:37:28.6390943Z Jan 25 05:37:28 	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> 2022-01-25T05:37:28.6391861Z Jan 25 05:37:28 	at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:440)
> 2022-01-25T05:37:28.6392992Z Jan 25 05:37:28 	at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:208)
> 2022-01-25T05:37:28.6394176Z Jan 25 05:37:28 	at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:77)
> 2022-01-25T05:37:28.6395306Z Jan 25 05:37:28 	at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:158)
> 2022-01-25T05:37:28.6396300Z Jan 25 05:37:28 	at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
> 2022-01-25T05:37:28.6397093Z Jan 25 05:37:28 	at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
> 2022-01-25T05:37:28.6398103Z Jan 25 05:37:28 	at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123)
> 2022-01-25T05:37:28.6398937Z Jan 25 05:37:28 	at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
> 2022-01-25T05:37:28.6399816Z Jan 25 05:37:28 	at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170)
> 2022-01-25T05:37:28.6400620Z Jan 25 05:37:28 	at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
> 2022-01-25T05:37:28.6401201Z Jan 25 05:37:28 	at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
> 2022-01-25T05:37:28.6401732Z Jan 25 05:37:28 	at akka.actor.Actor$class.aroundReceive(Actor.scala:517)
> 2022-01-25T05:37:28.6402283Z Jan 25 05:37:28 	at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
> 2022-01-25T05:37:28.6403036Z Jan 25 05:37:28 	at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
> 2022-01-25T05:37:28.6403655Z Jan 25 05:37:28 	at akka.actor.ActorCell.invoke(ActorCell.scala:561)
> 2022-01-25T05:37:28.6404279Z Jan 25 05:37:28 	at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
> 2022-01-25T05:37:28.6404784Z Jan 25 05:37:28 	at akka.dispatch.Mailbox.run(Mailbox.scala:225)
> 2022-01-25T05:37:28.6405247Z Jan 25 05:37:28 	at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
> 2022-01-25T05:37:28.6405759Z Jan 25 05:37:28 	at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> 2022-01-25T05:37:28.6406333Z Jan 25 05:37:28 	at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> 2022-01-25T05:37:28.6406913Z Jan 25 05:37:28 	at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> 2022-01-25T05:37:28.6407489Z Jan 25 05:37:28 	at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> 2022-01-25T05:37:28.6409430Z Jan 25 05:37:28 Caused by: java.util.concurrent.CompletionException: org.apache.flink.runtime.checkpoint.CheckpointException: Checkpoint triggering task Source: Custom Source -> Sink: Unnamed (3/4) of job 82cd873a17154bb7a4a7c858b0499b17 is not being executed at the moment. Aborting checkpoint. Failure reason: Not all required tasks are currently running.
> 2022-01-25T05:37:28.6410546Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:292)
> 2022-01-25T05:37:28.6411174Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:308)
> 2022-01-25T05:37:28.6411795Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:607)
> 2022-01-25T05:37:28.6412568Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:591)
> 2022-01-25T05:37:28.6413668Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
> 2022-01-25T05:37:28.6414801Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1990)
> 2022-01-25T05:37:28.6415863Z Jan 25 05:37:28 	at org.apache.flink.runtime.checkpoint.CheckpointCoordinator.lambda$null$0(CheckpointCoordinator.java:483)
> 2022-01-25T05:37:28.6416867Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
> 2022-01-25T05:37:28.6417815Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
> 2022-01-25T05:37:28.6418778Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
> 2022-01-25T05:37:28.6419831Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1990)
> 2022-01-25T05:37:28.6421211Z Jan 25 05:37:28 	at org.apache.flink.runtime.checkpoint.CheckpointCoordinator$CheckpointTriggerRequest.completeExceptionally(CheckpointCoordinator.java:2043)
> 2022-01-25T05:37:28.6422543Z Jan 25 05:37:28 	at org.apache.flink.runtime.checkpoint.CheckpointCoordinator.onTriggerFailure(CheckpointCoordinator.java:854)
> 2022-01-25T05:37:28.6424024Z Jan 25 05:37:28 	at org.apache.flink.runtime.checkpoint.CheckpointCoordinator.lambda$startTriggeringCheckpoint$7(CheckpointCoordinator.java:609)
> 2022-01-25T05:37:28.6425302Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:836)
> 2022-01-25T05:37:28.6426267Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture$UniHandle.tryFire(CompletableFuture.java:811)
> 2022-01-25T05:37:28.6427287Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:456)
> 2022-01-25T05:37:28.6428251Z Jan 25 05:37:28 	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> 2022-01-25T05:37:28.6429096Z Jan 25 05:37:28 	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> 2022-01-25T05:37:28.6430029Z Jan 25 05:37:28 	at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
> 2022-01-25T05:37:28.6431223Z Jan 25 05:37:28 	at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
> 2022-01-25T05:37:28.6432373Z Jan 25 05:37:28 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> 2022-01-25T05:37:28.6433631Z Jan 25 05:37:28 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> 2022-01-25T05:37:28.6434446Z Jan 25 05:37:28 	at java.lang.Thread.run(Thread.java:748)
> 2022-01-25T05:37:28.6436909Z Jan 25 05:37:28 Caused by: org.apache.flink.runtime.checkpoint.CheckpointException: Checkpoint triggering task Source: Custom Source -> Sink: Unnamed (3/4) of job 82cd873a17154bb7a4a7c858b0499b17 is not being executed at the moment. Aborting checkpoint. Failure reason: Not all required tasks are currently running.
> 2022-01-25T05:37:28.6438679Z Jan 25 05:37:28 	at org.apache.flink.runtime.checkpoint.DefaultCheckpointPlanCalculator.checkTasksStarted(DefaultCheckpointPlanCalculator.java:152)
> 2022-01-25T05:37:28.6440030Z Jan 25 05:37:28 	at org.apache.flink.runtime.checkpoint.DefaultCheckpointPlanCalculator.lambda$calculateCheckpointPlan$1(DefaultCheckpointPlanCalculator.java:114)
> 2022-01-25T05:37:28.6441193Z Jan 25 05:37:28 	at java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
> 2022-01-25T05:37:28.6442233Z Jan 25 05:37:28 	at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:440)
> 2022-01-25T05:37:28.6443743Z Jan 25 05:37:28 	at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:208)
> 2022-01-25T05:37:28.6444897Z Jan 25 05:37:28 	at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:77)
> 2022-01-25T05:37:28.6446141Z Jan 25 05:37:28 	at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:158)
> 2022-01-25T05:37:28.6447061Z Jan 25 05:37:28 	at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
> 2022-01-25T05:37:28.6448340Z Jan 25 05:37:28 	at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
> 2022-01-25T05:37:28.6449288Z Jan 25 05:37:28 	at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123)
> 2022-01-25T05:37:28.6450239Z Jan 25 05:37:28 	at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
> 2022-01-25T05:37:28.6451135Z Jan 25 05:37:28 	at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170)
> 2022-01-25T05:37:28.6451992Z Jan 25 05:37:28 	at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
> 2022-01-25T05:37:28.6453049Z Jan 25 05:37:28 	at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
> 2022-01-25T05:37:28.6454035Z Jan 25 05:37:28 	at akka.actor.Actor$class.aroundReceive(Actor.scala:517)
> 2022-01-25T05:37:28.6454898Z Jan 25 05:37:28 	at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
> 2022-01-25T05:37:28.6455753Z Jan 25 05:37:28 	at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
> 2022-01-25T05:37:28.6456558Z Jan 25 05:37:28 	at akka.actor.ActorCell.invoke(ActorCell.scala:561)
> 2022-01-25T05:37:28.6457393Z Jan 25 05:37:28 	at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
> 2022-01-25T05:37:28.6458494Z Jan 25 05:37:28 	at akka.dispatch.Mailbox.run(Mailbox.scala:225)
> 2022-01-25T05:37:28.6459253Z Jan 25 05:37:28 	at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
> 2022-01-25T05:37:28.6460108Z Jan 25 05:37:28 	at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> 2022-01-25T05:37:28.6461057Z Jan 25 05:37:28 	at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> 2022-01-25T05:37:28.6461985Z Jan 25 05:37:28 	at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> 2022-01-25T05:37:28.6463100Z Jan 25 05:37:28 	at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> {code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)