You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user-zh@flink.apache.org by Zhou Zach <wa...@163.com> on 2020/06/19 06:57:05 UTC

flink run from checkpoit failed




用yarn application kill flink job后,
执行/opt/flink-1.10.0/bin/flink run -s /user/flink10/checkpoints/69e450574d8520ac5961e20a6fc4798a/chk-18/_metadata -d -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond  /data/warehouse/streaming/data-flow-1.0.jar








2020-06-19 14:39:54,563 INFO  org.apache.flink.shaded.curator.org.apache.curator.framework.state.ConnectionStateManager  - State change: CONNECTED
2020-06-19 14:39:54,664 INFO  org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  - Starting ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
2020-06-19 14:40:24,728 INFO  org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  - Stopping ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
2020-06-19 14:40:24,729 INFO  org.apache.flink.shaded.curator.org.apache.curator.framework.imps.CuratorFrameworkImpl  - backgroundOperationsLoop exiting
2020-06-19 14:40:24,733 INFO  org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ZooKeeper  - Session: 0x272b776faca2414 closed
2020-06-19 14:40:24,733 INFO  org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ClientCnxn  - EventThread shut down for session: 0x272b776faca2414
2020-06-19 14:40:24,734 ERROR org.apache.flink.client.cli.CliFrontend                       - Error while running the command.
org.apache.flink.client.program.ProgramInvocationException: The main method caused an error: java.util.concurrent.ExecutionException: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit JobGraph.
        at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335)
        at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205)
        at org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138)
        at org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:664)
        at org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:213)
        at org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:895)
        at org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:968)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875)
        at org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41)
        at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:968)
Caused by: java.lang.RuntimeException: java.util.concurrent.ExecutionException: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit JobGraph.
        at org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:199)
        at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1741)
        at org.apache.flink.streaming.api.environment.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:94)
        at org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:63)
        at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620)
        at org.apache.flink.table.planner.delegation.StreamExecutor.execute(StreamExecutor.java:42)
        at org.apache.flink.table.api.internal.TableEnvironmentImpl.execute(TableEnvironmentImpl.java:643)
        at cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond$.main(FromKafkaSinkJdbcForCountPerSecond.scala:120)
        at cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond.main(FromKafkaSinkJdbcForCountPerSecond.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321)
        ... 11 more
Caused by: java.util.concurrent.ExecutionException: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit JobGraph.
        at java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
        at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895)
        at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1736)
        ... 23 more
Caused by: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit JobGraph.
        at org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:359)
        at java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:870)
        at java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:852)
        at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
        at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
        at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:274)
        at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
        at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736)
        at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
        at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
        at org.apache.flink.runtime.concurrent.FutureUtils$Timeout.run(FutureUtils.java:999)
        at org.apache.flink.runtime.concurrent.DirectExecutorService.execute(DirectExecutorService.java:211)
        at org.apache.flink.runtime.concurrent.FutureUtils.lambda$orTimeout$14(FutureUtils.java:427)
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
        at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
Caused by: java.util.concurrent.TimeoutException


Re:Re: Re:Re:Re: Re: Re: flink run from checkpoit failed

Posted by Zhou Zach <wa...@163.com>.
https://issues.apache.org/jira/browse/FLINK-10636
看到这个issues说这个问题是Kafka 0.8的问题,我现在用的kafka是2.2.1+cdh6.3.2,这个kafka版本也有问题吗

















在 2020-06-22 15:16:14,"Congxian Qiu" <qc...@gmail.com> 写道:
>1 首先,-s 后面跟的参数可以是 savepoint 也可以是 checkpoint path,从 retain checkpoint
>恢复就是这么启动的[1]
>2 从你的发的日志看,里面有一些认证相关的问题 `2020-06-22 13:00:59,368 ERROR
>org.apache.flink.shaded.curator.org.apache.curator.ConnectionState  -
>Authentication failed` 或许你可以先尝试解决下这个问题看看。
>
>[1]
>https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/state/checkpoints.html#resuming-from-a-retained-checkpoint
>Best,
>Congxian
>
>
>Zhou Zach <wa...@163.com> 于2020年6月22日周一 下午3:03写道:
>
>> flink run -s 后面跟的参数是不是只能是savepointPath,不能是flnk job 自动checkpoint path吗
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>> 在 2020-06-22 14:32:02,"Zhou Zach" <wa...@163.com> 写道:
>> >重启了CDH6集群,还是报同样的错误,flink 故障恢复不成功,不敢上生产啊,哪位大佬帮忙看下啊
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >在 2020-06-22 13:21:01,"Zhou Zach" <wa...@163.com> 写道:
>> >
>> >用yarn application kill flink job把yarn的application杀掉后,
>> >执行/opt/flink-1.10.0/bin/flink run -s
>> hdfs://nameservice1:8020/user/flink10/checkpoints/f1b6f5392cd5053db155e709ffe9f871/chk-15/_metadata
>> dataflow.sql.FromKafkaSinkJdbcForCountPerSecond
>> /data/warehouse/streaming/data-flow-1.0.jar,启动不起来,/opt/flink-1.10.0/log日志上传到附件了。。。
>> >
>> >
>> >执行/opt/flink-1.10.0/bin/flink run -c
>> dataflow.sql.FromKafkaSinkJdbcForCountPerSecond -m yarn-cluster -yjm 1024m
>> -ytm 8192m -p 2 -ys 4 -ynm UV -d data-flow-1.0.jar,是可以正常启动的,就是带上-s参数报错。。。
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >在 2020-06-21 09:16:45,"Congxian Qiu" <qc...@gmail.com> 写道:
>> >>Hi
>> >>
>> >>这个作业的 application 有起来吗?起来了的话,可以看看 JM
>> >>log,如果没有起来,可以从提交客户端的那看看有没有更详细的提交日志。日志目录默认在 `/opt/flink-1.10.0/log` 下面
>> >>
>> >>Best,
>> >>Congxian
>> >>
>> >>
>> >>Zhou Zach <wa...@163.com> 于2020年6月19日周五 下午8:15写道:
>> >>
>> >>> 我是per job模式,不是yarn session模式啊
>> >>>
>> >>>
>> >>>
>> >>>
>> >>>
>> >>>
>> >>>
>> >>>
>> >>>
>> >>>
>> >>>
>> >>>
>> >>>
>> >>>
>> >>>
>> >>>
>> >>>
>> >>> At 2020-06-19 20:06:47, "Rui Li" <li...@gmail.com> wrote:
>> >>> >那得重启yarn session,再把作业提交上去
>> >>> >
>> >>> >On Fri, Jun 19, 2020 at 6:22 PM Zhou Zach <wa...@163.com> wrote:
>> >>> >
>> >>> >>
>> >>> >>
>> >>> >>
>> >>> >>
>> >>> >>
>> >>> >>
>> >>> >> 用yarn application kill flink
>> job把yarn的application杀掉了,杀掉后yarn没有重启flink
>> >>> job
>> >>> >>
>> >>> >>
>> >>> >>
>> >>> >>
>> >>> >>
>> >>> >>
>> >>> >>
>> >>> >>
>> >>> >>
>> >>> >>
>> >>> >>
>> >>> >> 在 2020-06-19 17:54:45,"Rui Li" <li...@gmail.com> 写道:
>> >>> >> >用yarn application kill flink job是说把yarn的application杀掉了吗?杀掉以后有没有重启呀
>> >>> >> >
>> >>> >> >On Fri, Jun 19, 2020 at 4:09 PM Zhou Zach <wa...@163.com>
>> wrote:
>> >>> >> >
>> >>> >> >>
>> >>> >> >>
>> >>> >> >> 在flink-1.10.0/conf/flink-conf.yaml中加了下面两个超时参数,不起作用
>> >>> >> >> akka.client.timeout: 600000000
>> >>> >> >> akka.ask.timeout: 6000000
>> >>> >> >>
>> >>> >> >> 有大佬知道是什么原因吗
>> >>> >> >>
>> >>> >> >>
>> >>> >> >>
>> >>> >> >>
>> >>> >> >>
>> >>> >> >>
>> >>> >> >>
>> >>> >> >>
>> >>> >> >>
>> >>> >> >>
>> >>> >> >>
>> >>> >> >>
>> >>> >> >>
>> >>> >> >>
>> >>> >> >> 在 2020-06-19 14:57:05,"Zhou Zach" <wa...@163.com> 写道:
>> >>> >> >> >
>> >>> >> >> >
>> >>> >> >> >
>> >>> >> >> >
>> >>> >> >> >用yarn application kill flink job后,
>> >>> >> >> >执行/opt/flink-1.10.0/bin/flink run -s
>> >>> >> >>
>> >>> >>
>> >>>
>> /user/flink10/checkpoints/69e450574d8520ac5961e20a6fc4798a/chk-18/_metadata
>> >>> >> >> -d -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond
>> >>> >> >> /data/warehouse/streaming/data-flow-1.0.jar
>> >>> >> >> >
>> >>> >> >> >
>> >>> >> >> >
>> >>> >> >> >
>> >>> >> >> >
>> >>> >> >> >
>> >>> >> >> >
>> >>> >> >> >
>> >>> >> >> >2020-06-19 14:39:54,563 INFO
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.shaded.curator.org.apache.curator.framework.state.ConnectionStateManager
>> >>> >> >> - State change: CONNECTED
>> >>> >> >> >2020-06-19 14:39:54,664 INFO
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
>> >>> >> >> Starting ZooKeeperLeaderRetrievalService
>> /leader/rest_server_lock.
>> >>> >> >> >2020-06-19 14:40:24,728 INFO
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
>> >>> >> >> Stopping ZooKeeperLeaderRetrievalService
>> /leader/rest_server_lock.
>> >>> >> >> >2020-06-19 14:40:24,729 INFO
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.shaded.curator.org.apache.curator.framework.imps.CuratorFrameworkImpl
>> >>> >> >> - backgroundOperationsLoop exiting
>> >>> >> >> >2020-06-19 14:40:24,733 INFO
>> >>> >> >>
>> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ZooKeeper  -
>> >>> >> >> Session: 0x272b776faca2414 closed
>> >>> >> >> >2020-06-19 14:40:24,733 INFO
>> >>> >> >>
>> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ClientCnxn  -
>> >>> >> >> EventThread shut down for session: 0x272b776faca2414
>> >>> >> >> >2020-06-19 14:40:24,734 ERROR
>> >>> org.apache.flink.client.cli.CliFrontend
>> >>> >> >>                    - Error while running the command.
>> >>> >> >> >org.apache.flink.client.program.ProgramInvocationException: The
>> main
>> >>> >> >> method caused an error: java.util.concurrent.ExecutionException:
>> >>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
>> >>> submit
>> >>> >> >> JobGraph.
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205)
>> >>> >> >> >        at
>> >>> >> >>
>> >>>
>> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:664)
>> >>> >> >> >        at
>> >>> >> >> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:213)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:895)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:968)
>> >>> >> >> >        at java.security.AccessController.doPrivileged(Native
>> >>> Method)
>> >>> >> >> >        at javax.security.auth.Subject.doAs(Subject.java:422)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41)
>> >>> >> >> >        at
>> >>> >> >>
>> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:968)
>> >>> >> >> >Caused by: java.lang.RuntimeException:
>> >>> >> >> java.util.concurrent.ExecutionException:
>> >>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
>> >>> submit
>> >>> >> >> JobGraph.
>> >>> >> >> >        at
>> >>> >> >>
>> org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:199)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1741)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:94)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:63)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.table.planner.delegation.StreamExecutor.execute(StreamExecutor.java:42)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.table.api.internal.TableEnvironmentImpl.execute(TableEnvironmentImpl.java:643)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond$.main(FromKafkaSinkJdbcForCountPerSecond.scala:120)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond.main(FromKafkaSinkJdbcForCountPerSecond.scala)
>> >>> >> >> >        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
>> >>> Method)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> >>> >> >> >        at java.lang.reflect.Method.invoke(Method.java:498)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321)
>> >>> >> >> >        ... 11 more
>> >>> >> >> >Caused by: java.util.concurrent.ExecutionException:
>> >>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
>> >>> submit
>> >>> >> >> JobGraph.
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1736)
>> >>> >> >> >        ... 23 more
>> >>> >> >> >Caused by:
>> org.apache.flink.runtime.client.JobSubmissionException:
>> >>> >> Failed
>> >>> >> >> to submit JobGraph.
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:359)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:870)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:852)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:274)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.runtime.concurrent.FutureUtils$Timeout.run(FutureUtils.java:999)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.runtime.concurrent.DirectExecutorService.execute(DirectExecutorService.java:211)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$orTimeout$14(FutureUtils.java:427)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>> >>> >> >> >        at
>> java.util.concurrent.FutureTask.run(FutureTask.java:266)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>> >>> >> >> >        at
>> >>> >> >>
>> >>> >>
>> >>>
>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>> >>> >> >> >        at java.lang.Thread.run(Thread.java:748)
>> >>> >> >> >Caused by: java.util.concurrent.TimeoutException
>> >>> >> >> >
>> >>> >> >>
>> >>> >> >
>> >>> >> >
>> >>> >> >--
>> >>> >> >Best regards!
>> >>> >> >Rui Li
>> >>> >>
>> >>> >
>> >>> >
>> >>> >--
>> >>> >Best regards!
>> >>> >Rui Li
>> >>>
>> >
>> >
>> >
>> >
>> >
>> >
>>

Re: Re:Re:Re: Re: Re: flink run from checkpoit failed

Posted by Congxian Qiu <qc...@gmail.com>.
1 首先,-s 后面跟的参数可以是 savepoint 也可以是 checkpoint path,从 retain checkpoint
恢复就是这么启动的[1]
2 从你的发的日志看,里面有一些认证相关的问题 `2020-06-22 13:00:59,368 ERROR
org.apache.flink.shaded.curator.org.apache.curator.ConnectionState  -
Authentication failed` 或许你可以先尝试解决下这个问题看看。

[1]
https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/state/checkpoints.html#resuming-from-a-retained-checkpoint
Best,
Congxian


Zhou Zach <wa...@163.com> 于2020年6月22日周一 下午3:03写道:

> flink run -s 后面跟的参数是不是只能是savepointPath,不能是flnk job 自动checkpoint path吗
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
> 在 2020-06-22 14:32:02,"Zhou Zach" <wa...@163.com> 写道:
> >重启了CDH6集群,还是报同样的错误,flink 故障恢复不成功,不敢上生产啊,哪位大佬帮忙看下啊
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >在 2020-06-22 13:21:01,"Zhou Zach" <wa...@163.com> 写道:
> >
> >用yarn application kill flink job把yarn的application杀掉后,
> >执行/opt/flink-1.10.0/bin/flink run -s
> hdfs://nameservice1:8020/user/flink10/checkpoints/f1b6f5392cd5053db155e709ffe9f871/chk-15/_metadata
> dataflow.sql.FromKafkaSinkJdbcForCountPerSecond
> /data/warehouse/streaming/data-flow-1.0.jar,启动不起来,/opt/flink-1.10.0/log日志上传到附件了。。。
> >
> >
> >执行/opt/flink-1.10.0/bin/flink run -c
> dataflow.sql.FromKafkaSinkJdbcForCountPerSecond -m yarn-cluster -yjm 1024m
> -ytm 8192m -p 2 -ys 4 -ynm UV -d data-flow-1.0.jar,是可以正常启动的,就是带上-s参数报错。。。
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >在 2020-06-21 09:16:45,"Congxian Qiu" <qc...@gmail.com> 写道:
> >>Hi
> >>
> >>这个作业的 application 有起来吗?起来了的话,可以看看 JM
> >>log,如果没有起来,可以从提交客户端的那看看有没有更详细的提交日志。日志目录默认在 `/opt/flink-1.10.0/log` 下面
> >>
> >>Best,
> >>Congxian
> >>
> >>
> >>Zhou Zach <wa...@163.com> 于2020年6月19日周五 下午8:15写道:
> >>
> >>> 我是per job模式,不是yarn session模式啊
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >>> At 2020-06-19 20:06:47, "Rui Li" <li...@gmail.com> wrote:
> >>> >那得重启yarn session,再把作业提交上去
> >>> >
> >>> >On Fri, Jun 19, 2020 at 6:22 PM Zhou Zach <wa...@163.com> wrote:
> >>> >
> >>> >>
> >>> >>
> >>> >>
> >>> >>
> >>> >>
> >>> >>
> >>> >> 用yarn application kill flink
> job把yarn的application杀掉了,杀掉后yarn没有重启flink
> >>> job
> >>> >>
> >>> >>
> >>> >>
> >>> >>
> >>> >>
> >>> >>
> >>> >>
> >>> >>
> >>> >>
> >>> >>
> >>> >>
> >>> >> 在 2020-06-19 17:54:45,"Rui Li" <li...@gmail.com> 写道:
> >>> >> >用yarn application kill flink job是说把yarn的application杀掉了吗?杀掉以后有没有重启呀
> >>> >> >
> >>> >> >On Fri, Jun 19, 2020 at 4:09 PM Zhou Zach <wa...@163.com>
> wrote:
> >>> >> >
> >>> >> >>
> >>> >> >>
> >>> >> >> 在flink-1.10.0/conf/flink-conf.yaml中加了下面两个超时参数,不起作用
> >>> >> >> akka.client.timeout: 600000000
> >>> >> >> akka.ask.timeout: 6000000
> >>> >> >>
> >>> >> >> 有大佬知道是什么原因吗
> >>> >> >>
> >>> >> >>
> >>> >> >>
> >>> >> >>
> >>> >> >>
> >>> >> >>
> >>> >> >>
> >>> >> >>
> >>> >> >>
> >>> >> >>
> >>> >> >>
> >>> >> >>
> >>> >> >>
> >>> >> >>
> >>> >> >> 在 2020-06-19 14:57:05,"Zhou Zach" <wa...@163.com> 写道:
> >>> >> >> >
> >>> >> >> >
> >>> >> >> >
> >>> >> >> >
> >>> >> >> >用yarn application kill flink job后,
> >>> >> >> >执行/opt/flink-1.10.0/bin/flink run -s
> >>> >> >>
> >>> >>
> >>>
> /user/flink10/checkpoints/69e450574d8520ac5961e20a6fc4798a/chk-18/_metadata
> >>> >> >> -d -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond
> >>> >> >> /data/warehouse/streaming/data-flow-1.0.jar
> >>> >> >> >
> >>> >> >> >
> >>> >> >> >
> >>> >> >> >
> >>> >> >> >
> >>> >> >> >
> >>> >> >> >
> >>> >> >> >
> >>> >> >> >2020-06-19 14:39:54,563 INFO
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.shaded.curator.org.apache.curator.framework.state.ConnectionStateManager
> >>> >> >> - State change: CONNECTED
> >>> >> >> >2020-06-19 14:39:54,664 INFO
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
> >>> >> >> Starting ZooKeeperLeaderRetrievalService
> /leader/rest_server_lock.
> >>> >> >> >2020-06-19 14:40:24,728 INFO
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
> >>> >> >> Stopping ZooKeeperLeaderRetrievalService
> /leader/rest_server_lock.
> >>> >> >> >2020-06-19 14:40:24,729 INFO
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.shaded.curator.org.apache.curator.framework.imps.CuratorFrameworkImpl
> >>> >> >> - backgroundOperationsLoop exiting
> >>> >> >> >2020-06-19 14:40:24,733 INFO
> >>> >> >>
> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ZooKeeper  -
> >>> >> >> Session: 0x272b776faca2414 closed
> >>> >> >> >2020-06-19 14:40:24,733 INFO
> >>> >> >>
> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ClientCnxn  -
> >>> >> >> EventThread shut down for session: 0x272b776faca2414
> >>> >> >> >2020-06-19 14:40:24,734 ERROR
> >>> org.apache.flink.client.cli.CliFrontend
> >>> >> >>                    - Error while running the command.
> >>> >> >> >org.apache.flink.client.program.ProgramInvocationException: The
> main
> >>> >> >> method caused an error: java.util.concurrent.ExecutionException:
> >>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
> >>> submit
> >>> >> >> JobGraph.
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205)
> >>> >> >> >        at
> >>> >> >>
> >>>
> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:664)
> >>> >> >> >        at
> >>> >> >> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:213)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:895)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:968)
> >>> >> >> >        at java.security.AccessController.doPrivileged(Native
> >>> Method)
> >>> >> >> >        at javax.security.auth.Subject.doAs(Subject.java:422)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41)
> >>> >> >> >        at
> >>> >> >>
> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:968)
> >>> >> >> >Caused by: java.lang.RuntimeException:
> >>> >> >> java.util.concurrent.ExecutionException:
> >>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
> >>> submit
> >>> >> >> JobGraph.
> >>> >> >> >        at
> >>> >> >>
> org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:199)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1741)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.streaming.api.environment.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:94)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:63)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.table.planner.delegation.StreamExecutor.execute(StreamExecutor.java:42)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.table.api.internal.TableEnvironmentImpl.execute(TableEnvironmentImpl.java:643)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond$.main(FromKafkaSinkJdbcForCountPerSecond.scala:120)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond.main(FromKafkaSinkJdbcForCountPerSecond.scala)
> >>> >> >> >        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
> >>> Method)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >>> >> >> >        at java.lang.reflect.Method.invoke(Method.java:498)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321)
> >>> >> >> >        ... 11 more
> >>> >> >> >Caused by: java.util.concurrent.ExecutionException:
> >>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
> >>> submit
> >>> >> >> JobGraph.
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
> >>> >> >> >        at
> >>> >> >>
> >>> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1736)
> >>> >> >> >        ... 23 more
> >>> >> >> >Caused by:
> org.apache.flink.runtime.client.JobSubmissionException:
> >>> >> Failed
> >>> >> >> to submit JobGraph.
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:359)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:870)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:852)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:274)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.runtime.concurrent.FutureUtils$Timeout.run(FutureUtils.java:999)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.runtime.concurrent.DirectExecutorService.execute(DirectExecutorService.java:211)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> org.apache.flink.runtime.concurrent.FutureUtils.lambda$orTimeout$14(FutureUtils.java:427)
> >>> >> >> >        at
> >>> >> >>
> >>> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> >>> >> >> >        at
> java.util.concurrent.FutureTask.run(FutureTask.java:266)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> >>> >> >> >        at
> >>> >> >>
> >>> >>
> >>>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> >>> >> >> >        at java.lang.Thread.run(Thread.java:748)
> >>> >> >> >Caused by: java.util.concurrent.TimeoutException
> >>> >> >> >
> >>> >> >>
> >>> >> >
> >>> >> >
> >>> >> >--
> >>> >> >Best regards!
> >>> >> >Rui Li
> >>> >>
> >>> >
> >>> >
> >>> >--
> >>> >Best regards!
> >>> >Rui Li
> >>>
> >
> >
> >
> >
> >
> >
>

Re:Re:Re:Re: Re: Re: flink run from checkpoit failed

Posted by Zhou Zach <wa...@163.com>.
flink run -s 后面跟的参数是不是只能是savepointPath,不能是flnk job 自动checkpoint path吗















在 2020-06-22 14:32:02,"Zhou Zach" <wa...@163.com> 写道:
>重启了CDH6集群,还是报同样的错误,flink 故障恢复不成功,不敢上生产啊,哪位大佬帮忙看下啊
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>在 2020-06-22 13:21:01,"Zhou Zach" <wa...@163.com> 写道:
>
>用yarn application kill flink job把yarn的application杀掉后,
>执行/opt/flink-1.10.0/bin/flink run -s hdfs://nameservice1:8020/user/flink10/checkpoints/f1b6f5392cd5053db155e709ffe9f871/chk-15/_metadata  dataflow.sql.FromKafkaSinkJdbcForCountPerSecond /data/warehouse/streaming/data-flow-1.0.jar,启动不起来,/opt/flink-1.10.0/log日志上传到附件了。。。
>
>
>执行/opt/flink-1.10.0/bin/flink run -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond -m yarn-cluster -yjm 1024m -ytm 8192m -p 2 -ys 4 -ynm UV -d data-flow-1.0.jar,是可以正常启动的,就是带上-s参数报错。。。
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>在 2020-06-21 09:16:45,"Congxian Qiu" <qc...@gmail.com> 写道:
>>Hi
>>
>>这个作业的 application 有起来吗?起来了的话,可以看看 JM
>>log,如果没有起来,可以从提交客户端的那看看有没有更详细的提交日志。日志目录默认在 `/opt/flink-1.10.0/log` 下面
>>
>>Best,
>>Congxian
>>
>>
>>Zhou Zach <wa...@163.com> 于2020年6月19日周五 下午8:15写道:
>>
>>> 我是per job模式,不是yarn session模式啊
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>> At 2020-06-19 20:06:47, "Rui Li" <li...@gmail.com> wrote:
>>> >那得重启yarn session,再把作业提交上去
>>> >
>>> >On Fri, Jun 19, 2020 at 6:22 PM Zhou Zach <wa...@163.com> wrote:
>>> >
>>> >>
>>> >>
>>> >>
>>> >>
>>> >>
>>> >>
>>> >> 用yarn application kill flink job把yarn的application杀掉了,杀掉后yarn没有重启flink
>>> job
>>> >>
>>> >>
>>> >>
>>> >>
>>> >>
>>> >>
>>> >>
>>> >>
>>> >>
>>> >>
>>> >>
>>> >> 在 2020-06-19 17:54:45,"Rui Li" <li...@gmail.com> 写道:
>>> >> >用yarn application kill flink job是说把yarn的application杀掉了吗?杀掉以后有没有重启呀
>>> >> >
>>> >> >On Fri, Jun 19, 2020 at 4:09 PM Zhou Zach <wa...@163.com> wrote:
>>> >> >
>>> >> >>
>>> >> >>
>>> >> >> 在flink-1.10.0/conf/flink-conf.yaml中加了下面两个超时参数,不起作用
>>> >> >> akka.client.timeout: 600000000
>>> >> >> akka.ask.timeout: 6000000
>>> >> >>
>>> >> >> 有大佬知道是什么原因吗
>>> >> >>
>>> >> >>
>>> >> >>
>>> >> >>
>>> >> >>
>>> >> >>
>>> >> >>
>>> >> >>
>>> >> >>
>>> >> >>
>>> >> >>
>>> >> >>
>>> >> >>
>>> >> >>
>>> >> >> 在 2020-06-19 14:57:05,"Zhou Zach" <wa...@163.com> 写道:
>>> >> >> >
>>> >> >> >
>>> >> >> >
>>> >> >> >
>>> >> >> >用yarn application kill flink job后,
>>> >> >> >执行/opt/flink-1.10.0/bin/flink run -s
>>> >> >>
>>> >>
>>> /user/flink10/checkpoints/69e450574d8520ac5961e20a6fc4798a/chk-18/_metadata
>>> >> >> -d -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond
>>> >> >> /data/warehouse/streaming/data-flow-1.0.jar
>>> >> >> >
>>> >> >> >
>>> >> >> >
>>> >> >> >
>>> >> >> >
>>> >> >> >
>>> >> >> >
>>> >> >> >
>>> >> >> >2020-06-19 14:39:54,563 INFO
>>> >> >>
>>> >>
>>> org.apache.flink.shaded.curator.org.apache.curator.framework.state.ConnectionStateManager
>>> >> >> - State change: CONNECTED
>>> >> >> >2020-06-19 14:39:54,664 INFO
>>> >> >>
>>> >>
>>> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
>>> >> >> Starting ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
>>> >> >> >2020-06-19 14:40:24,728 INFO
>>> >> >>
>>> >>
>>> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
>>> >> >> Stopping ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
>>> >> >> >2020-06-19 14:40:24,729 INFO
>>> >> >>
>>> >>
>>> org.apache.flink.shaded.curator.org.apache.curator.framework.imps.CuratorFrameworkImpl
>>> >> >> - backgroundOperationsLoop exiting
>>> >> >> >2020-06-19 14:40:24,733 INFO
>>> >> >> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ZooKeeper  -
>>> >> >> Session: 0x272b776faca2414 closed
>>> >> >> >2020-06-19 14:40:24,733 INFO
>>> >> >> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ClientCnxn  -
>>> >> >> EventThread shut down for session: 0x272b776faca2414
>>> >> >> >2020-06-19 14:40:24,734 ERROR
>>> org.apache.flink.client.cli.CliFrontend
>>> >> >>                    - Error while running the command.
>>> >> >> >org.apache.flink.client.program.ProgramInvocationException: The main
>>> >> >> method caused an error: java.util.concurrent.ExecutionException:
>>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
>>> submit
>>> >> >> JobGraph.
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205)
>>> >> >> >        at
>>> >> >>
>>> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:664)
>>> >> >> >        at
>>> >> >> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:213)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:895)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:968)
>>> >> >> >        at java.security.AccessController.doPrivileged(Native
>>> Method)
>>> >> >> >        at javax.security.auth.Subject.doAs(Subject.java:422)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41)
>>> >> >> >        at
>>> >> >> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:968)
>>> >> >> >Caused by: java.lang.RuntimeException:
>>> >> >> java.util.concurrent.ExecutionException:
>>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
>>> submit
>>> >> >> JobGraph.
>>> >> >> >        at
>>> >> >> org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:199)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1741)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:94)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:63)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.table.planner.delegation.StreamExecutor.execute(StreamExecutor.java:42)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.table.api.internal.TableEnvironmentImpl.execute(TableEnvironmentImpl.java:643)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond$.main(FromKafkaSinkJdbcForCountPerSecond.scala:120)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond.main(FromKafkaSinkJdbcForCountPerSecond.scala)
>>> >> >> >        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
>>> Method)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>> >> >> >        at java.lang.reflect.Method.invoke(Method.java:498)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321)
>>> >> >> >        ... 11 more
>>> >> >> >Caused by: java.util.concurrent.ExecutionException:
>>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
>>> submit
>>> >> >> JobGraph.
>>> >> >> >        at
>>> >> >>
>>> >>
>>> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
>>> >> >> >        at
>>> >> >>
>>> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1736)
>>> >> >> >        ... 23 more
>>> >> >> >Caused by: org.apache.flink.runtime.client.JobSubmissionException:
>>> >> Failed
>>> >> >> to submit JobGraph.
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:359)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:870)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:852)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:274)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.runtime.concurrent.FutureUtils$Timeout.run(FutureUtils.java:999)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.runtime.concurrent.DirectExecutorService.execute(DirectExecutorService.java:211)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$orTimeout$14(FutureUtils.java:427)
>>> >> >> >        at
>>> >> >>
>>> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>>> >> >> >        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>>> >> >> >        at
>>> >> >>
>>> >>
>>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>>> >> >> >        at java.lang.Thread.run(Thread.java:748)
>>> >> >> >Caused by: java.util.concurrent.TimeoutException
>>> >> >> >
>>> >> >>
>>> >> >
>>> >> >
>>> >> >--
>>> >> >Best regards!
>>> >> >Rui Li
>>> >>
>>> >
>>> >
>>> >--
>>> >Best regards!
>>> >Rui Li
>>>
>
>
>
>
>
> 

Re:Re:Re: Re: Re: flink run from checkpoit failed

Posted by Zhou Zach <wa...@163.com>.
重启了CDH6集群,还是报同样的错误,flink 故障恢复不成功,不敢上生产啊,哪位大佬帮忙看下啊
















在 2020-06-22 13:21:01,"Zhou Zach" <wa...@163.com> 写道:

用yarn application kill flink job把yarn的application杀掉后,
执行/opt/flink-1.10.0/bin/flink run -s hdfs://nameservice1:8020/user/flink10/checkpoints/f1b6f5392cd5053db155e709ffe9f871/chk-15/_metadata  dataflow.sql.FromKafkaSinkJdbcForCountPerSecond /data/warehouse/streaming/data-flow-1.0.jar,启动不起来,/opt/flink-1.10.0/log日志上传到附件了。。。


执行/opt/flink-1.10.0/bin/flink run -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond -m yarn-cluster -yjm 1024m -ytm 8192m -p 2 -ys 4 -ynm UV -d data-flow-1.0.jar,是可以正常启动的,就是带上-s参数报错。。。



















在 2020-06-21 09:16:45,"Congxian Qiu" <qc...@gmail.com> 写道:
>Hi
>
>这个作业的 application 有起来吗?起来了的话,可以看看 JM
>log,如果没有起来,可以从提交客户端的那看看有没有更详细的提交日志。日志目录默认在 `/opt/flink-1.10.0/log` 下面
>
>Best,
>Congxian
>
>
>Zhou Zach <wa...@163.com> 于2020年6月19日周五 下午8:15写道:
>
>> 我是per job模式,不是yarn session模式啊
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>> At 2020-06-19 20:06:47, "Rui Li" <li...@gmail.com> wrote:
>> >那得重启yarn session,再把作业提交上去
>> >
>> >On Fri, Jun 19, 2020 at 6:22 PM Zhou Zach <wa...@163.com> wrote:
>> >
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >> 用yarn application kill flink job把yarn的application杀掉了,杀掉后yarn没有重启flink
>> job
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >> 在 2020-06-19 17:54:45,"Rui Li" <li...@gmail.com> 写道:
>> >> >用yarn application kill flink job是说把yarn的application杀掉了吗?杀掉以后有没有重启呀
>> >> >
>> >> >On Fri, Jun 19, 2020 at 4:09 PM Zhou Zach <wa...@163.com> wrote:
>> >> >
>> >> >>
>> >> >>
>> >> >> 在flink-1.10.0/conf/flink-conf.yaml中加了下面两个超时参数,不起作用
>> >> >> akka.client.timeout: 600000000
>> >> >> akka.ask.timeout: 6000000
>> >> >>
>> >> >> 有大佬知道是什么原因吗
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >> 在 2020-06-19 14:57:05,"Zhou Zach" <wa...@163.com> 写道:
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >用yarn application kill flink job后,
>> >> >> >执行/opt/flink-1.10.0/bin/flink run -s
>> >> >>
>> >>
>> /user/flink10/checkpoints/69e450574d8520ac5961e20a6fc4798a/chk-18/_metadata
>> >> >> -d -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond
>> >> >> /data/warehouse/streaming/data-flow-1.0.jar
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >2020-06-19 14:39:54,563 INFO
>> >> >>
>> >>
>> org.apache.flink.shaded.curator.org.apache.curator.framework.state.ConnectionStateManager
>> >> >> - State change: CONNECTED
>> >> >> >2020-06-19 14:39:54,664 INFO
>> >> >>
>> >>
>> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
>> >> >> Starting ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
>> >> >> >2020-06-19 14:40:24,728 INFO
>> >> >>
>> >>
>> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
>> >> >> Stopping ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
>> >> >> >2020-06-19 14:40:24,729 INFO
>> >> >>
>> >>
>> org.apache.flink.shaded.curator.org.apache.curator.framework.imps.CuratorFrameworkImpl
>> >> >> - backgroundOperationsLoop exiting
>> >> >> >2020-06-19 14:40:24,733 INFO
>> >> >> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ZooKeeper  -
>> >> >> Session: 0x272b776faca2414 closed
>> >> >> >2020-06-19 14:40:24,733 INFO
>> >> >> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ClientCnxn  -
>> >> >> EventThread shut down for session: 0x272b776faca2414
>> >> >> >2020-06-19 14:40:24,734 ERROR
>> org.apache.flink.client.cli.CliFrontend
>> >> >>                    - Error while running the command.
>> >> >> >org.apache.flink.client.program.ProgramInvocationException: The main
>> >> >> method caused an error: java.util.concurrent.ExecutionException:
>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
>> submit
>> >> >> JobGraph.
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205)
>> >> >> >        at
>> >> >>
>> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:664)
>> >> >> >        at
>> >> >> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:213)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:895)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:968)
>> >> >> >        at java.security.AccessController.doPrivileged(Native
>> Method)
>> >> >> >        at javax.security.auth.Subject.doAs(Subject.java:422)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41)
>> >> >> >        at
>> >> >> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:968)
>> >> >> >Caused by: java.lang.RuntimeException:
>> >> >> java.util.concurrent.ExecutionException:
>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
>> submit
>> >> >> JobGraph.
>> >> >> >        at
>> >> >> org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:199)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1741)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:94)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:63)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.table.planner.delegation.StreamExecutor.execute(StreamExecutor.java:42)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.table.api.internal.TableEnvironmentImpl.execute(TableEnvironmentImpl.java:643)
>> >> >> >        at
>> >> >>
>> >>
>> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond$.main(FromKafkaSinkJdbcForCountPerSecond.scala:120)
>> >> >> >        at
>> >> >>
>> >>
>> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond.main(FromKafkaSinkJdbcForCountPerSecond.scala)
>> >> >> >        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
>> Method)
>> >> >> >        at
>> >> >>
>> >>
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> >> >> >        at
>> >> >>
>> >>
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> >> >> >        at java.lang.reflect.Method.invoke(Method.java:498)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321)
>> >> >> >        ... 11 more
>> >> >> >Caused by: java.util.concurrent.ExecutionException:
>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
>> submit
>> >> >> JobGraph.
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
>> >> >> >        at
>> >> >>
>> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1736)
>> >> >> >        ... 23 more
>> >> >> >Caused by: org.apache.flink.runtime.client.JobSubmissionException:
>> >> Failed
>> >> >> to submit JobGraph.
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:359)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:870)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:852)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:274)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.runtime.concurrent.FutureUtils$Timeout.run(FutureUtils.java:999)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.runtime.concurrent.DirectExecutorService.execute(DirectExecutorService.java:211)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$orTimeout$14(FutureUtils.java:427)
>> >> >> >        at
>> >> >>
>> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>> >> >> >        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>> >> >> >        at java.lang.Thread.run(Thread.java:748)
>> >> >> >Caused by: java.util.concurrent.TimeoutException
>> >> >> >
>> >> >>
>> >> >
>> >> >
>> >> >--
>> >> >Best regards!
>> >> >Rui Li
>> >>
>> >
>> >
>> >--
>> >Best regards!
>> >Rui Li
>>





 

Re:Re: Re: Re: flink run from checkpoit failed

Posted by Zhou Zach <wa...@163.com>.
用yarn application kill flink job把yarn的application杀掉后,
执行/opt/flink-1.10.0/bin/flink run -s hdfs://nameservice1:8020/user/flink10/checkpoints/f1b6f5392cd5053db155e709ffe9f871/chk-15/_metadata  dataflow.sql.FromKafkaSinkJdbcForCountPerSecond /data/warehouse/streaming/data-flow-1.0.jar,启动不起来,/opt/flink-1.10.0/log日志上传到附件了。。。


执行/opt/flink-1.10.0/bin/flink run -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond -m yarn-cluster -yjm 1024m -ytm 8192m -p 2 -ys 4 -ynm UV -d data-flow-1.0.jar,是可以正常启动的,就是带上-s参数报错。。。



















在 2020-06-21 09:16:45,"Congxian Qiu" <qc...@gmail.com> 写道:
>Hi
>
>这个作业的 application 有起来吗?起来了的话,可以看看 JM
>log,如果没有起来,可以从提交客户端的那看看有没有更详细的提交日志。日志目录默认在 `/opt/flink-1.10.0/log` 下面
>
>Best,
>Congxian
>
>
>Zhou Zach <wa...@163.com> 于2020年6月19日周五 下午8:15写道:
>
>> 我是per job模式,不是yarn session模式啊
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>> At 2020-06-19 20:06:47, "Rui Li" <li...@gmail.com> wrote:
>> >那得重启yarn session,再把作业提交上去
>> >
>> >On Fri, Jun 19, 2020 at 6:22 PM Zhou Zach <wa...@163.com> wrote:
>> >
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >> 用yarn application kill flink job把yarn的application杀掉了,杀掉后yarn没有重启flink
>> job
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >> 在 2020-06-19 17:54:45,"Rui Li" <li...@gmail.com> 写道:
>> >> >用yarn application kill flink job是说把yarn的application杀掉了吗?杀掉以后有没有重启呀
>> >> >
>> >> >On Fri, Jun 19, 2020 at 4:09 PM Zhou Zach <wa...@163.com> wrote:
>> >> >
>> >> >>
>> >> >>
>> >> >> 在flink-1.10.0/conf/flink-conf.yaml中加了下面两个超时参数,不起作用
>> >> >> akka.client.timeout: 600000000
>> >> >> akka.ask.timeout: 6000000
>> >> >>
>> >> >> 有大佬知道是什么原因吗
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >> 在 2020-06-19 14:57:05,"Zhou Zach" <wa...@163.com> 写道:
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >用yarn application kill flink job后,
>> >> >> >执行/opt/flink-1.10.0/bin/flink run -s
>> >> >>
>> >>
>> /user/flink10/checkpoints/69e450574d8520ac5961e20a6fc4798a/chk-18/_metadata
>> >> >> -d -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond
>> >> >> /data/warehouse/streaming/data-flow-1.0.jar
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >
>> >> >> >2020-06-19 14:39:54,563 INFO
>> >> >>
>> >>
>> org.apache.flink.shaded.curator.org.apache.curator.framework.state.ConnectionStateManager
>> >> >> - State change: CONNECTED
>> >> >> >2020-06-19 14:39:54,664 INFO
>> >> >>
>> >>
>> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
>> >> >> Starting ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
>> >> >> >2020-06-19 14:40:24,728 INFO
>> >> >>
>> >>
>> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
>> >> >> Stopping ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
>> >> >> >2020-06-19 14:40:24,729 INFO
>> >> >>
>> >>
>> org.apache.flink.shaded.curator.org.apache.curator.framework.imps.CuratorFrameworkImpl
>> >> >> - backgroundOperationsLoop exiting
>> >> >> >2020-06-19 14:40:24,733 INFO
>> >> >> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ZooKeeper  -
>> >> >> Session: 0x272b776faca2414 closed
>> >> >> >2020-06-19 14:40:24,733 INFO
>> >> >> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ClientCnxn  -
>> >> >> EventThread shut down for session: 0x272b776faca2414
>> >> >> >2020-06-19 14:40:24,734 ERROR
>> org.apache.flink.client.cli.CliFrontend
>> >> >>                    - Error while running the command.
>> >> >> >org.apache.flink.client.program.ProgramInvocationException: The main
>> >> >> method caused an error: java.util.concurrent.ExecutionException:
>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
>> submit
>> >> >> JobGraph.
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205)
>> >> >> >        at
>> >> >>
>> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:664)
>> >> >> >        at
>> >> >> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:213)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:895)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:968)
>> >> >> >        at java.security.AccessController.doPrivileged(Native
>> Method)
>> >> >> >        at javax.security.auth.Subject.doAs(Subject.java:422)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41)
>> >> >> >        at
>> >> >> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:968)
>> >> >> >Caused by: java.lang.RuntimeException:
>> >> >> java.util.concurrent.ExecutionException:
>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
>> submit
>> >> >> JobGraph.
>> >> >> >        at
>> >> >> org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:199)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1741)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:94)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:63)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.table.planner.delegation.StreamExecutor.execute(StreamExecutor.java:42)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.table.api.internal.TableEnvironmentImpl.execute(TableEnvironmentImpl.java:643)
>> >> >> >        at
>> >> >>
>> >>
>> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond$.main(FromKafkaSinkJdbcForCountPerSecond.scala:120)
>> >> >> >        at
>> >> >>
>> >>
>> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond.main(FromKafkaSinkJdbcForCountPerSecond.scala)
>> >> >> >        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
>> Method)
>> >> >> >        at
>> >> >>
>> >>
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> >> >> >        at
>> >> >>
>> >>
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> >> >> >        at java.lang.reflect.Method.invoke(Method.java:498)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321)
>> >> >> >        ... 11 more
>> >> >> >Caused by: java.util.concurrent.ExecutionException:
>> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
>> submit
>> >> >> JobGraph.
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
>> >> >> >        at
>> >> >>
>> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1736)
>> >> >> >        ... 23 more
>> >> >> >Caused by: org.apache.flink.runtime.client.JobSubmissionException:
>> >> Failed
>> >> >> to submit JobGraph.
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:359)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:870)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:852)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:274)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.runtime.concurrent.FutureUtils$Timeout.run(FutureUtils.java:999)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.runtime.concurrent.DirectExecutorService.execute(DirectExecutorService.java:211)
>> >> >> >        at
>> >> >>
>> >>
>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$orTimeout$14(FutureUtils.java:427)
>> >> >> >        at
>> >> >>
>> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>> >> >> >        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>> >> >> >        at
>> >> >>
>> >>
>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>> >> >> >        at java.lang.Thread.run(Thread.java:748)
>> >> >> >Caused by: java.util.concurrent.TimeoutException
>> >> >> >
>> >> >>
>> >> >
>> >> >
>> >> >--
>> >> >Best regards!
>> >> >Rui Li
>> >>
>> >
>> >
>> >--
>> >Best regards!
>> >Rui Li
>>

Re: Re: Re: flink run from checkpoit failed

Posted by Congxian Qiu <qc...@gmail.com>.
Hi

这个作业的 application 有起来吗?起来了的话,可以看看 JM
log,如果没有起来,可以从提交客户端的那看看有没有更详细的提交日志。日志目录默认在 `/opt/flink-1.10.0/log` 下面

Best,
Congxian


Zhou Zach <wa...@163.com> 于2020年6月19日周五 下午8:15写道:

> 我是per job模式,不是yarn session模式啊
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
> At 2020-06-19 20:06:47, "Rui Li" <li...@gmail.com> wrote:
> >那得重启yarn session,再把作业提交上去
> >
> >On Fri, Jun 19, 2020 at 6:22 PM Zhou Zach <wa...@163.com> wrote:
> >
> >>
> >>
> >>
> >>
> >>
> >>
> >> 用yarn application kill flink job把yarn的application杀掉了,杀掉后yarn没有重启flink
> job
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> >> 在 2020-06-19 17:54:45,"Rui Li" <li...@gmail.com> 写道:
> >> >用yarn application kill flink job是说把yarn的application杀掉了吗?杀掉以后有没有重启呀
> >> >
> >> >On Fri, Jun 19, 2020 at 4:09 PM Zhou Zach <wa...@163.com> wrote:
> >> >
> >> >>
> >> >>
> >> >> 在flink-1.10.0/conf/flink-conf.yaml中加了下面两个超时参数,不起作用
> >> >> akka.client.timeout: 600000000
> >> >> akka.ask.timeout: 6000000
> >> >>
> >> >> 有大佬知道是什么原因吗
> >> >>
> >> >>
> >> >>
> >> >>
> >> >>
> >> >>
> >> >>
> >> >>
> >> >>
> >> >>
> >> >>
> >> >>
> >> >>
> >> >>
> >> >> 在 2020-06-19 14:57:05,"Zhou Zach" <wa...@163.com> 写道:
> >> >> >
> >> >> >
> >> >> >
> >> >> >
> >> >> >用yarn application kill flink job后,
> >> >> >执行/opt/flink-1.10.0/bin/flink run -s
> >> >>
> >>
> /user/flink10/checkpoints/69e450574d8520ac5961e20a6fc4798a/chk-18/_metadata
> >> >> -d -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond
> >> >> /data/warehouse/streaming/data-flow-1.0.jar
> >> >> >
> >> >> >
> >> >> >
> >> >> >
> >> >> >
> >> >> >
> >> >> >
> >> >> >
> >> >> >2020-06-19 14:39:54,563 INFO
> >> >>
> >>
> org.apache.flink.shaded.curator.org.apache.curator.framework.state.ConnectionStateManager
> >> >> - State change: CONNECTED
> >> >> >2020-06-19 14:39:54,664 INFO
> >> >>
> >>
> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
> >> >> Starting ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
> >> >> >2020-06-19 14:40:24,728 INFO
> >> >>
> >>
> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
> >> >> Stopping ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
> >> >> >2020-06-19 14:40:24,729 INFO
> >> >>
> >>
> org.apache.flink.shaded.curator.org.apache.curator.framework.imps.CuratorFrameworkImpl
> >> >> - backgroundOperationsLoop exiting
> >> >> >2020-06-19 14:40:24,733 INFO
> >> >> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ZooKeeper  -
> >> >> Session: 0x272b776faca2414 closed
> >> >> >2020-06-19 14:40:24,733 INFO
> >> >> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ClientCnxn  -
> >> >> EventThread shut down for session: 0x272b776faca2414
> >> >> >2020-06-19 14:40:24,734 ERROR
> org.apache.flink.client.cli.CliFrontend
> >> >>                    - Error while running the command.
> >> >> >org.apache.flink.client.program.ProgramInvocationException: The main
> >> >> method caused an error: java.util.concurrent.ExecutionException:
> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
> submit
> >> >> JobGraph.
> >> >> >        at
> >> >>
> >>
> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205)
> >> >> >        at
> >> >>
> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:664)
> >> >> >        at
> >> >> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:213)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:895)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:968)
> >> >> >        at java.security.AccessController.doPrivileged(Native
> Method)
> >> >> >        at javax.security.auth.Subject.doAs(Subject.java:422)
> >> >> >        at
> >> >>
> >>
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41)
> >> >> >        at
> >> >> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:968)
> >> >> >Caused by: java.lang.RuntimeException:
> >> >> java.util.concurrent.ExecutionException:
> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
> submit
> >> >> JobGraph.
> >> >> >        at
> >> >> org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:199)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1741)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.streaming.api.environment.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:94)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:63)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.table.planner.delegation.StreamExecutor.execute(StreamExecutor.java:42)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.table.api.internal.TableEnvironmentImpl.execute(TableEnvironmentImpl.java:643)
> >> >> >        at
> >> >>
> >>
> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond$.main(FromKafkaSinkJdbcForCountPerSecond.scala:120)
> >> >> >        at
> >> >>
> >>
> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond.main(FromKafkaSinkJdbcForCountPerSecond.scala)
> >> >> >        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
> Method)
> >> >> >        at
> >> >>
> >>
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> >> >> >        at
> >> >>
> >>
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >> >> >        at java.lang.reflect.Method.invoke(Method.java:498)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321)
> >> >> >        ... 11 more
> >> >> >Caused by: java.util.concurrent.ExecutionException:
> >> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to
> submit
> >> >> JobGraph.
> >> >> >        at
> >> >>
> >>
> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
> >> >> >        at
> >> >>
> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1736)
> >> >> >        ... 23 more
> >> >> >Caused by: org.apache.flink.runtime.client.JobSubmissionException:
> >> Failed
> >> >> to submit JobGraph.
> >> >> >        at
> >> >>
> >>
> org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:359)
> >> >> >        at
> >> >>
> >>
> java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:870)
> >> >> >        at
> >> >>
> >>
> java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:852)
> >> >> >        at
> >> >>
> >>
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
> >> >> >        at
> >> >>
> >>
> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:274)
> >> >> >        at
> >> >>
> >>
> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
> >> >> >        at
> >> >>
> >>
> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736)
> >> >> >        at
> >> >>
> >>
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
> >> >> >        at
> >> >>
> >>
> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.runtime.concurrent.FutureUtils$Timeout.run(FutureUtils.java:999)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.runtime.concurrent.DirectExecutorService.execute(DirectExecutorService.java:211)
> >> >> >        at
> >> >>
> >>
> org.apache.flink.runtime.concurrent.FutureUtils.lambda$orTimeout$14(FutureUtils.java:427)
> >> >> >        at
> >> >>
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> >> >> >        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> >> >> >        at
> >> >>
> >>
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
> >> >> >        at
> >> >>
> >>
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
> >> >> >        at
> >> >>
> >>
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> >> >> >        at
> >> >>
> >>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> >> >> >        at java.lang.Thread.run(Thread.java:748)
> >> >> >Caused by: java.util.concurrent.TimeoutException
> >> >> >
> >> >>
> >> >
> >> >
> >> >--
> >> >Best regards!
> >> >Rui Li
> >>
> >
> >
> >--
> >Best regards!
> >Rui Li
>

Re:Re: Re: flink run from checkpoit failed

Posted by Zhou Zach <wa...@163.com>.
我是per job模式,不是yarn session模式啊

















At 2020-06-19 20:06:47, "Rui Li" <li...@gmail.com> wrote:
>那得重启yarn session,再把作业提交上去
>
>On Fri, Jun 19, 2020 at 6:22 PM Zhou Zach <wa...@163.com> wrote:
>
>>
>>
>>
>>
>>
>>
>> 用yarn application kill flink job把yarn的application杀掉了,杀掉后yarn没有重启flink job
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>> 在 2020-06-19 17:54:45,"Rui Li" <li...@gmail.com> 写道:
>> >用yarn application kill flink job是说把yarn的application杀掉了吗?杀掉以后有没有重启呀
>> >
>> >On Fri, Jun 19, 2020 at 4:09 PM Zhou Zach <wa...@163.com> wrote:
>> >
>> >>
>> >>
>> >> 在flink-1.10.0/conf/flink-conf.yaml中加了下面两个超时参数,不起作用
>> >> akka.client.timeout: 600000000
>> >> akka.ask.timeout: 6000000
>> >>
>> >> 有大佬知道是什么原因吗
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >> 在 2020-06-19 14:57:05,"Zhou Zach" <wa...@163.com> 写道:
>> >> >
>> >> >
>> >> >
>> >> >
>> >> >用yarn application kill flink job后,
>> >> >执行/opt/flink-1.10.0/bin/flink run -s
>> >>
>> /user/flink10/checkpoints/69e450574d8520ac5961e20a6fc4798a/chk-18/_metadata
>> >> -d -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond
>> >> /data/warehouse/streaming/data-flow-1.0.jar
>> >> >
>> >> >
>> >> >
>> >> >
>> >> >
>> >> >
>> >> >
>> >> >
>> >> >2020-06-19 14:39:54,563 INFO
>> >>
>> org.apache.flink.shaded.curator.org.apache.curator.framework.state.ConnectionStateManager
>> >> - State change: CONNECTED
>> >> >2020-06-19 14:39:54,664 INFO
>> >>
>> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
>> >> Starting ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
>> >> >2020-06-19 14:40:24,728 INFO
>> >>
>> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
>> >> Stopping ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
>> >> >2020-06-19 14:40:24,729 INFO
>> >>
>> org.apache.flink.shaded.curator.org.apache.curator.framework.imps.CuratorFrameworkImpl
>> >> - backgroundOperationsLoop exiting
>> >> >2020-06-19 14:40:24,733 INFO
>> >> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ZooKeeper  -
>> >> Session: 0x272b776faca2414 closed
>> >> >2020-06-19 14:40:24,733 INFO
>> >> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ClientCnxn  -
>> >> EventThread shut down for session: 0x272b776faca2414
>> >> >2020-06-19 14:40:24,734 ERROR org.apache.flink.client.cli.CliFrontend
>> >>                    - Error while running the command.
>> >> >org.apache.flink.client.program.ProgramInvocationException: The main
>> >> method caused an error: java.util.concurrent.ExecutionException:
>> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to submit
>> >> JobGraph.
>> >> >        at
>> >>
>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335)
>> >> >        at
>> >>
>> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205)
>> >> >        at
>> >> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138)
>> >> >        at
>> >>
>> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:664)
>> >> >        at
>> >> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:213)
>> >> >        at
>> >>
>> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:895)
>> >> >        at
>> >>
>> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:968)
>> >> >        at java.security.AccessController.doPrivileged(Native Method)
>> >> >        at javax.security.auth.Subject.doAs(Subject.java:422)
>> >> >        at
>> >>
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875)
>> >> >        at
>> >>
>> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41)
>> >> >        at
>> >> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:968)
>> >> >Caused by: java.lang.RuntimeException:
>> >> java.util.concurrent.ExecutionException:
>> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to submit
>> >> JobGraph.
>> >> >        at
>> >> org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:199)
>> >> >        at
>> >>
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1741)
>> >> >        at
>> >>
>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:94)
>> >> >        at
>> >>
>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:63)
>> >> >        at
>> >>
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620)
>> >> >        at
>> >>
>> org.apache.flink.table.planner.delegation.StreamExecutor.execute(StreamExecutor.java:42)
>> >> >        at
>> >>
>> org.apache.flink.table.api.internal.TableEnvironmentImpl.execute(TableEnvironmentImpl.java:643)
>> >> >        at
>> >>
>> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond$.main(FromKafkaSinkJdbcForCountPerSecond.scala:120)
>> >> >        at
>> >>
>> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond.main(FromKafkaSinkJdbcForCountPerSecond.scala)
>> >> >        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>> >> >        at
>> >>
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> >> >        at
>> >>
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> >> >        at java.lang.reflect.Method.invoke(Method.java:498)
>> >> >        at
>> >>
>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321)
>> >> >        ... 11 more
>> >> >Caused by: java.util.concurrent.ExecutionException:
>> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to submit
>> >> JobGraph.
>> >> >        at
>> >>
>> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
>> >> >        at
>> >> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895)
>> >> >        at
>> >>
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1736)
>> >> >        ... 23 more
>> >> >Caused by: org.apache.flink.runtime.client.JobSubmissionException:
>> Failed
>> >> to submit JobGraph.
>> >> >        at
>> >>
>> org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:359)
>> >> >        at
>> >>
>> java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:870)
>> >> >        at
>> >>
>> java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:852)
>> >> >        at
>> >>
>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>> >> >        at
>> >>
>> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>> >> >        at
>> >>
>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:274)
>> >> >        at
>> >>
>> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
>> >> >        at
>> >>
>> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736)
>> >> >        at
>> >>
>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>> >> >        at
>> >>
>> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>> >> >        at
>> >>
>> org.apache.flink.runtime.concurrent.FutureUtils$Timeout.run(FutureUtils.java:999)
>> >> >        at
>> >>
>> org.apache.flink.runtime.concurrent.DirectExecutorService.execute(DirectExecutorService.java:211)
>> >> >        at
>> >>
>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$orTimeout$14(FutureUtils.java:427)
>> >> >        at
>> >> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>> >> >        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>> >> >        at
>> >>
>> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
>> >> >        at
>> >>
>> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
>> >> >        at
>> >>
>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>> >> >        at
>> >>
>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>> >> >        at java.lang.Thread.run(Thread.java:748)
>> >> >Caused by: java.util.concurrent.TimeoutException
>> >> >
>> >>
>> >
>> >
>> >--
>> >Best regards!
>> >Rui Li
>>
>
>
>-- 
>Best regards!
>Rui Li

Re: Re: flink run from checkpoit failed

Posted by Rui Li <li...@gmail.com>.
那得重启yarn session,再把作业提交上去

On Fri, Jun 19, 2020 at 6:22 PM Zhou Zach <wa...@163.com> wrote:

>
>
>
>
>
>
> 用yarn application kill flink job把yarn的application杀掉了,杀掉后yarn没有重启flink job
>
>
>
>
>
>
>
>
>
>
>
> 在 2020-06-19 17:54:45,"Rui Li" <li...@gmail.com> 写道:
> >用yarn application kill flink job是说把yarn的application杀掉了吗?杀掉以后有没有重启呀
> >
> >On Fri, Jun 19, 2020 at 4:09 PM Zhou Zach <wa...@163.com> wrote:
> >
> >>
> >>
> >> 在flink-1.10.0/conf/flink-conf.yaml中加了下面两个超时参数,不起作用
> >> akka.client.timeout: 600000000
> >> akka.ask.timeout: 6000000
> >>
> >> 有大佬知道是什么原因吗
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> >> 在 2020-06-19 14:57:05,"Zhou Zach" <wa...@163.com> 写道:
> >> >
> >> >
> >> >
> >> >
> >> >用yarn application kill flink job后,
> >> >执行/opt/flink-1.10.0/bin/flink run -s
> >>
> /user/flink10/checkpoints/69e450574d8520ac5961e20a6fc4798a/chk-18/_metadata
> >> -d -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond
> >> /data/warehouse/streaming/data-flow-1.0.jar
> >> >
> >> >
> >> >
> >> >
> >> >
> >> >
> >> >
> >> >
> >> >2020-06-19 14:39:54,563 INFO
> >>
> org.apache.flink.shaded.curator.org.apache.curator.framework.state.ConnectionStateManager
> >> - State change: CONNECTED
> >> >2020-06-19 14:39:54,664 INFO
> >>
> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
> >> Starting ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
> >> >2020-06-19 14:40:24,728 INFO
> >>
> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
> >> Stopping ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
> >> >2020-06-19 14:40:24,729 INFO
> >>
> org.apache.flink.shaded.curator.org.apache.curator.framework.imps.CuratorFrameworkImpl
> >> - backgroundOperationsLoop exiting
> >> >2020-06-19 14:40:24,733 INFO
> >> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ZooKeeper  -
> >> Session: 0x272b776faca2414 closed
> >> >2020-06-19 14:40:24,733 INFO
> >> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ClientCnxn  -
> >> EventThread shut down for session: 0x272b776faca2414
> >> >2020-06-19 14:40:24,734 ERROR org.apache.flink.client.cli.CliFrontend
> >>                    - Error while running the command.
> >> >org.apache.flink.client.program.ProgramInvocationException: The main
> >> method caused an error: java.util.concurrent.ExecutionException:
> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to submit
> >> JobGraph.
> >> >        at
> >>
> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335)
> >> >        at
> >>
> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205)
> >> >        at
> >> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138)
> >> >        at
> >>
> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:664)
> >> >        at
> >> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:213)
> >> >        at
> >>
> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:895)
> >> >        at
> >>
> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:968)
> >> >        at java.security.AccessController.doPrivileged(Native Method)
> >> >        at javax.security.auth.Subject.doAs(Subject.java:422)
> >> >        at
> >>
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875)
> >> >        at
> >>
> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41)
> >> >        at
> >> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:968)
> >> >Caused by: java.lang.RuntimeException:
> >> java.util.concurrent.ExecutionException:
> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to submit
> >> JobGraph.
> >> >        at
> >> org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:199)
> >> >        at
> >>
> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1741)
> >> >        at
> >>
> org.apache.flink.streaming.api.environment.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:94)
> >> >        at
> >>
> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:63)
> >> >        at
> >>
> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620)
> >> >        at
> >>
> org.apache.flink.table.planner.delegation.StreamExecutor.execute(StreamExecutor.java:42)
> >> >        at
> >>
> org.apache.flink.table.api.internal.TableEnvironmentImpl.execute(TableEnvironmentImpl.java:643)
> >> >        at
> >>
> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond$.main(FromKafkaSinkJdbcForCountPerSecond.scala:120)
> >> >        at
> >>
> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond.main(FromKafkaSinkJdbcForCountPerSecond.scala)
> >> >        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> >> >        at
> >>
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> >> >        at
> >>
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >> >        at java.lang.reflect.Method.invoke(Method.java:498)
> >> >        at
> >>
> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321)
> >> >        ... 11 more
> >> >Caused by: java.util.concurrent.ExecutionException:
> >> org.apache.flink.runtime.client.JobSubmissionException: Failed to submit
> >> JobGraph.
> >> >        at
> >>
> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
> >> >        at
> >> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895)
> >> >        at
> >>
> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1736)
> >> >        ... 23 more
> >> >Caused by: org.apache.flink.runtime.client.JobSubmissionException:
> Failed
> >> to submit JobGraph.
> >> >        at
> >>
> org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:359)
> >> >        at
> >>
> java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:870)
> >> >        at
> >>
> java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:852)
> >> >        at
> >>
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
> >> >        at
> >>
> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
> >> >        at
> >>
> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:274)
> >> >        at
> >>
> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
> >> >        at
> >>
> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736)
> >> >        at
> >>
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
> >> >        at
> >>
> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
> >> >        at
> >>
> org.apache.flink.runtime.concurrent.FutureUtils$Timeout.run(FutureUtils.java:999)
> >> >        at
> >>
> org.apache.flink.runtime.concurrent.DirectExecutorService.execute(DirectExecutorService.java:211)
> >> >        at
> >>
> org.apache.flink.runtime.concurrent.FutureUtils.lambda$orTimeout$14(FutureUtils.java:427)
> >> >        at
> >> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> >> >        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> >> >        at
> >>
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
> >> >        at
> >>
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
> >> >        at
> >>
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> >> >        at
> >>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> >> >        at java.lang.Thread.run(Thread.java:748)
> >> >Caused by: java.util.concurrent.TimeoutException
> >> >
> >>
> >
> >
> >--
> >Best regards!
> >Rui Li
>


-- 
Best regards!
Rui Li

Re:Re: flink run from checkpoit failed

Posted by Zhou Zach <wa...@163.com>.





用yarn application kill flink job把yarn的application杀掉了,杀掉后yarn没有重启flink job











在 2020-06-19 17:54:45,"Rui Li" <li...@gmail.com> 写道:
>用yarn application kill flink job是说把yarn的application杀掉了吗?杀掉以后有没有重启呀
>
>On Fri, Jun 19, 2020 at 4:09 PM Zhou Zach <wa...@163.com> wrote:
>
>>
>>
>> 在flink-1.10.0/conf/flink-conf.yaml中加了下面两个超时参数,不起作用
>> akka.client.timeout: 600000000
>> akka.ask.timeout: 6000000
>>
>> 有大佬知道是什么原因吗
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>> 在 2020-06-19 14:57:05,"Zhou Zach" <wa...@163.com> 写道:
>> >
>> >
>> >
>> >
>> >用yarn application kill flink job后,
>> >执行/opt/flink-1.10.0/bin/flink run -s
>> /user/flink10/checkpoints/69e450574d8520ac5961e20a6fc4798a/chk-18/_metadata
>> -d -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond
>> /data/warehouse/streaming/data-flow-1.0.jar
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> >2020-06-19 14:39:54,563 INFO
>> org.apache.flink.shaded.curator.org.apache.curator.framework.state.ConnectionStateManager
>> - State change: CONNECTED
>> >2020-06-19 14:39:54,664 INFO
>> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
>> Starting ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
>> >2020-06-19 14:40:24,728 INFO
>> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
>> Stopping ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
>> >2020-06-19 14:40:24,729 INFO
>> org.apache.flink.shaded.curator.org.apache.curator.framework.imps.CuratorFrameworkImpl
>> - backgroundOperationsLoop exiting
>> >2020-06-19 14:40:24,733 INFO
>> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ZooKeeper  -
>> Session: 0x272b776faca2414 closed
>> >2020-06-19 14:40:24,733 INFO
>> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ClientCnxn  -
>> EventThread shut down for session: 0x272b776faca2414
>> >2020-06-19 14:40:24,734 ERROR org.apache.flink.client.cli.CliFrontend
>>                    - Error while running the command.
>> >org.apache.flink.client.program.ProgramInvocationException: The main
>> method caused an error: java.util.concurrent.ExecutionException:
>> org.apache.flink.runtime.client.JobSubmissionException: Failed to submit
>> JobGraph.
>> >        at
>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335)
>> >        at
>> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205)
>> >        at
>> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138)
>> >        at
>> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:664)
>> >        at
>> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:213)
>> >        at
>> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:895)
>> >        at
>> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:968)
>> >        at java.security.AccessController.doPrivileged(Native Method)
>> >        at javax.security.auth.Subject.doAs(Subject.java:422)
>> >        at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875)
>> >        at
>> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41)
>> >        at
>> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:968)
>> >Caused by: java.lang.RuntimeException:
>> java.util.concurrent.ExecutionException:
>> org.apache.flink.runtime.client.JobSubmissionException: Failed to submit
>> JobGraph.
>> >        at
>> org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:199)
>> >        at
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1741)
>> >        at
>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:94)
>> >        at
>> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:63)
>> >        at
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620)
>> >        at
>> org.apache.flink.table.planner.delegation.StreamExecutor.execute(StreamExecutor.java:42)
>> >        at
>> org.apache.flink.table.api.internal.TableEnvironmentImpl.execute(TableEnvironmentImpl.java:643)
>> >        at
>> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond$.main(FromKafkaSinkJdbcForCountPerSecond.scala:120)
>> >        at
>> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond.main(FromKafkaSinkJdbcForCountPerSecond.scala)
>> >        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>> >        at
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> >        at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> >        at java.lang.reflect.Method.invoke(Method.java:498)
>> >        at
>> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321)
>> >        ... 11 more
>> >Caused by: java.util.concurrent.ExecutionException:
>> org.apache.flink.runtime.client.JobSubmissionException: Failed to submit
>> JobGraph.
>> >        at
>> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
>> >        at
>> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895)
>> >        at
>> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1736)
>> >        ... 23 more
>> >Caused by: org.apache.flink.runtime.client.JobSubmissionException: Failed
>> to submit JobGraph.
>> >        at
>> org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:359)
>> >        at
>> java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:870)
>> >        at
>> java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:852)
>> >        at
>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>> >        at
>> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>> >        at
>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:274)
>> >        at
>> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
>> >        at
>> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736)
>> >        at
>> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>> >        at
>> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>> >        at
>> org.apache.flink.runtime.concurrent.FutureUtils$Timeout.run(FutureUtils.java:999)
>> >        at
>> org.apache.flink.runtime.concurrent.DirectExecutorService.execute(DirectExecutorService.java:211)
>> >        at
>> org.apache.flink.runtime.concurrent.FutureUtils.lambda$orTimeout$14(FutureUtils.java:427)
>> >        at
>> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>> >        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>> >        at
>> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
>> >        at
>> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
>> >        at
>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>> >        at
>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>> >        at java.lang.Thread.run(Thread.java:748)
>> >Caused by: java.util.concurrent.TimeoutException
>> >
>>
>
>
>-- 
>Best regards!
>Rui Li

Re: flink run from checkpoit failed

Posted by Rui Li <li...@gmail.com>.
用yarn application kill flink job是说把yarn的application杀掉了吗?杀掉以后有没有重启呀

On Fri, Jun 19, 2020 at 4:09 PM Zhou Zach <wa...@163.com> wrote:

>
>
> 在flink-1.10.0/conf/flink-conf.yaml中加了下面两个超时参数,不起作用
> akka.client.timeout: 600000000
> akka.ask.timeout: 6000000
>
> 有大佬知道是什么原因吗
>
>
>
>
>
>
>
>
>
>
>
>
>
>
> 在 2020-06-19 14:57:05,"Zhou Zach" <wa...@163.com> 写道:
> >
> >
> >
> >
> >用yarn application kill flink job后,
> >执行/opt/flink-1.10.0/bin/flink run -s
> /user/flink10/checkpoints/69e450574d8520ac5961e20a6fc4798a/chk-18/_metadata
> -d -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond
> /data/warehouse/streaming/data-flow-1.0.jar
> >
> >
> >
> >
> >
> >
> >
> >
> >2020-06-19 14:39:54,563 INFO
> org.apache.flink.shaded.curator.org.apache.curator.framework.state.ConnectionStateManager
> - State change: CONNECTED
> >2020-06-19 14:39:54,664 INFO
> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
> Starting ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
> >2020-06-19 14:40:24,728 INFO
> org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  -
> Stopping ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
> >2020-06-19 14:40:24,729 INFO
> org.apache.flink.shaded.curator.org.apache.curator.framework.imps.CuratorFrameworkImpl
> - backgroundOperationsLoop exiting
> >2020-06-19 14:40:24,733 INFO
> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ZooKeeper  -
> Session: 0x272b776faca2414 closed
> >2020-06-19 14:40:24,733 INFO
> org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ClientCnxn  -
> EventThread shut down for session: 0x272b776faca2414
> >2020-06-19 14:40:24,734 ERROR org.apache.flink.client.cli.CliFrontend
>                    - Error while running the command.
> >org.apache.flink.client.program.ProgramInvocationException: The main
> method caused an error: java.util.concurrent.ExecutionException:
> org.apache.flink.runtime.client.JobSubmissionException: Failed to submit
> JobGraph.
> >        at
> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335)
> >        at
> org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205)
> >        at
> org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138)
> >        at
> org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:664)
> >        at
> org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:213)
> >        at
> org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:895)
> >        at
> org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:968)
> >        at java.security.AccessController.doPrivileged(Native Method)
> >        at javax.security.auth.Subject.doAs(Subject.java:422)
> >        at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875)
> >        at
> org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41)
> >        at
> org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:968)
> >Caused by: java.lang.RuntimeException:
> java.util.concurrent.ExecutionException:
> org.apache.flink.runtime.client.JobSubmissionException: Failed to submit
> JobGraph.
> >        at
> org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:199)
> >        at
> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1741)
> >        at
> org.apache.flink.streaming.api.environment.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:94)
> >        at
> org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:63)
> >        at
> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620)
> >        at
> org.apache.flink.table.planner.delegation.StreamExecutor.execute(StreamExecutor.java:42)
> >        at
> org.apache.flink.table.api.internal.TableEnvironmentImpl.execute(TableEnvironmentImpl.java:643)
> >        at
> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond$.main(FromKafkaSinkJdbcForCountPerSecond.scala:120)
> >        at
> cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond.main(FromKafkaSinkJdbcForCountPerSecond.scala)
> >        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> >        at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> >        at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >        at java.lang.reflect.Method.invoke(Method.java:498)
> >        at
> org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321)
> >        ... 11 more
> >Caused by: java.util.concurrent.ExecutionException:
> org.apache.flink.runtime.client.JobSubmissionException: Failed to submit
> JobGraph.
> >        at
> java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
> >        at
> java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895)
> >        at
> org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1736)
> >        ... 23 more
> >Caused by: org.apache.flink.runtime.client.JobSubmissionException: Failed
> to submit JobGraph.
> >        at
> org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:359)
> >        at
> java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:870)
> >        at
> java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:852)
> >        at
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
> >        at
> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
> >        at
> org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:274)
> >        at
> java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
> >        at
> java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736)
> >        at
> java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
> >        at
> java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
> >        at
> org.apache.flink.runtime.concurrent.FutureUtils$Timeout.run(FutureUtils.java:999)
> >        at
> org.apache.flink.runtime.concurrent.DirectExecutorService.execute(DirectExecutorService.java:211)
> >        at
> org.apache.flink.runtime.concurrent.FutureUtils.lambda$orTimeout$14(FutureUtils.java:427)
> >        at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> >        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> >        at
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
> >        at
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
> >        at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> >        at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> >        at java.lang.Thread.run(Thread.java:748)
> >Caused by: java.util.concurrent.TimeoutException
> >
>


-- 
Best regards!
Rui Li

Re:flink run from checkpoit failed

Posted by Zhou Zach <wa...@163.com>.

在flink-1.10.0/conf/flink-conf.yaml中加了下面两个超时参数,不起作用
akka.client.timeout: 600000000
akka.ask.timeout: 6000000

有大佬知道是什么原因吗














在 2020-06-19 14:57:05,"Zhou Zach" <wa...@163.com> 写道:
>
>
>
>
>用yarn application kill flink job后,
>执行/opt/flink-1.10.0/bin/flink run -s /user/flink10/checkpoints/69e450574d8520ac5961e20a6fc4798a/chk-18/_metadata -d -c dataflow.sql.FromKafkaSinkJdbcForCountPerSecond  /data/warehouse/streaming/data-flow-1.0.jar
>
>
>
>
>
>
>
>
>2020-06-19 14:39:54,563 INFO  org.apache.flink.shaded.curator.org.apache.curator.framework.state.ConnectionStateManager  - State change: CONNECTED
>2020-06-19 14:39:54,664 INFO  org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  - Starting ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
>2020-06-19 14:40:24,728 INFO  org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService  - Stopping ZooKeeperLeaderRetrievalService /leader/rest_server_lock.
>2020-06-19 14:40:24,729 INFO  org.apache.flink.shaded.curator.org.apache.curator.framework.imps.CuratorFrameworkImpl  - backgroundOperationsLoop exiting
>2020-06-19 14:40:24,733 INFO  org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ZooKeeper  - Session: 0x272b776faca2414 closed
>2020-06-19 14:40:24,733 INFO  org.apache.flink.shaded.zookeeper.org.apache.zookeeper.ClientCnxn  - EventThread shut down for session: 0x272b776faca2414
>2020-06-19 14:40:24,734 ERROR org.apache.flink.client.cli.CliFrontend                       - Error while running the command.
>org.apache.flink.client.program.ProgramInvocationException: The main method caused an error: java.util.concurrent.ExecutionException: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit JobGraph.
>        at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:335)
>        at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:205)
>        at org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:138)
>        at org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:664)
>        at org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:213)
>        at org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:895)
>        at org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:968)
>        at java.security.AccessController.doPrivileged(Native Method)
>        at javax.security.auth.Subject.doAs(Subject.java:422)
>        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875)
>        at org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41)
>        at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:968)
>Caused by: java.lang.RuntimeException: java.util.concurrent.ExecutionException: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit JobGraph.
>        at org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:199)
>        at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1741)
>        at org.apache.flink.streaming.api.environment.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:94)
>        at org.apache.flink.streaming.api.environment.StreamContextEnvironment.execute(StreamContextEnvironment.java:63)
>        at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1620)
>        at org.apache.flink.table.planner.delegation.StreamExecutor.execute(StreamExecutor.java:42)
>        at org.apache.flink.table.api.internal.TableEnvironmentImpl.execute(TableEnvironmentImpl.java:643)
>        at cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond$.main(FromKafkaSinkJdbcForCountPerSecond.scala:120)
>        at cn.ibobei.qile.dataflow.sql.FromKafkaSinkJdbcForCountPerSecond.main(FromKafkaSinkJdbcForCountPerSecond.scala)
>        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>        at java.lang.reflect.Method.invoke(Method.java:498)
>        at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:321)
>        ... 11 more
>Caused by: java.util.concurrent.ExecutionException: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit JobGraph.
>        at java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
>        at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895)
>        at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1736)
>        ... 23 more
>Caused by: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit JobGraph.
>        at org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:359)
>        at java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:870)
>        at java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:852)
>        at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>        at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>        at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$8(FutureUtils.java:274)
>        at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:760)
>        at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:736)
>        at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
>        at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1977)
>        at org.apache.flink.runtime.concurrent.FutureUtils$Timeout.run(FutureUtils.java:999)
>        at org.apache.flink.runtime.concurrent.DirectExecutorService.execute(DirectExecutorService.java:211)
>        at org.apache.flink.runtime.concurrent.FutureUtils.lambda$orTimeout$14(FutureUtils.java:427)
>        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>        at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
>        at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
>        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>        at java.lang.Thread.run(Thread.java:748)
>Caused by: java.util.concurrent.TimeoutException
>