You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user-zh@flink.apache.org by 陈卓宇 <25...@qq.com.INVALID> on 2022/04/14 01:25:41 UTC
Native Kubernetes 模式提交任务异常
按照官方文档进行以下操作:
# (1) Start Kubernetes session $ ./bin/kubernetes-session.sh -Dkubernetes.cluster-id=my-first-flink-cluster # (2) Submit example job $ ./bin/flink run \ --target kubernetes-session \ -Dkubernetes.cluster-id=my-first-flink-cluster \ ./examples/streaming/TopSpeedWindowing.jar
报错:org.apache.flink.client.program.ProgramInvocationException: The main method caused an error: Failed to execute job 'CarTopSpeedWindowingExample'.
at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:360)
at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:213)
at org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:114)
at org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:816)
at org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:248)
at org.apache.flink.client.cli.CliFrontend.parseAndRun(CliFrontend.java:1058)
at org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:1136)
at org.apache.flink.runtime.security.contexts.NoOpSecurityContext.runSecured(NoOpSecurityContext.java:28)
at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:1136)
Caused by: org.apache.flink.util.FlinkException: Failed to execute job 'CarTopSpeedWindowingExample'.
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1918)
at org.apache.flink.client.program.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:135)
at org.apache.flink.client.program.StreamContextEnvironment.execute(StreamContextEnvironment.java:76)
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1782)
at org.apache.flink.streaming.examples.windowing.TopSpeedWindowing.main(TopSpeedWindowing.java:99)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:343)
... 8 more
Caused by: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit JobGraph.
at org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:400)
at java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:884)
at java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:866)
at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1990)
at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$9(FutureUtils.java:390)
at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1990)
at org.apache.flink.runtime.rest.RestClient.lambda$submitRequest$1(RestClient.java:430)
at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:577)
at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListeners0(DefaultPromise.java:570)
at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:549)
at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:490)
at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:615)
at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.setFailure0(DefaultPromise.java:608)
at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.tryFailure(DefaultPromise.java:117)
at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.fulfillConnectPromise(AbstractNioChannel.java:321)
at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:337)
at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:702)
at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:650)
at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:576)
at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
at org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.flink.runtime.concurrent.FutureUtils$RetryException: Could not complete the operation. Number of retries has been exhausted.
at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$9(FutureUtils.java:386)
... 21 more
Caused by: java.util.concurrent.CompletionException: org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: rancher-test.ziroom.com/10.30.238.18:8081
at java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:292)
at java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:308)
at java.util.concurrent.CompletableFuture.uniCompose(CompletableFuture.java:957)
at java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:940)
... 19 more
Caused by: org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: rancher-test.ziroom.com/10.30.238.18:8081
Caused by: java.net.ConnectException: Connection refused
at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
at org.apache.flink.shaded.netty4.io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:330)
at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:334)
at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:702)
at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:650)
at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:576)
at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
at org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at java.lang.Thread.run(Thread.java:748)
请教大佬我该如何解决此问题?
卓
Re: Native Kubernetes 模式提交任务异常
Posted by huweihua <hu...@gmail.com>.
更正:
1. Cluster IP 只能在 Flink 内部使用 —-> Cluster IP 只能在 Kubernetes 内部使用
> 2022年4月15日 下午4:12,huweihua <hu...@gmail.com> 写道:
>
> Flink
Re: Native Kubernetes 模式提交任务异常
Posted by huweihua <hu...@gmail.com>.
Hi, 卓宇,yidan
当前 Flink service 对外暴露的方式有 3 种[1],可以参考判断当前环境适合使用哪种
1. ClusterIP
Cluster IP 只能在 Flink 内部使用,如果想在外部访问,需要通过 kubectl port-forward 进行流量转发。这种模式下,Flink 拿到的 JobManager 地址为: {clusterId}-rest.{k8s-namespace}:{rest port}
ClusterIP 在 1.15 版本调整为默认模式
2. NodePort
NodePort 会在每一台 K8S 节点上转发 service 流量,这种模式下 Flink 拿到的 JobManager 地址为 {k8s node ip} : {node port}
注意: 在 Flink 1.14 之前的版本,{k8s node ip} 默认取 K8S master ip,如果集群使用了 vip 则会访问失败,在 1.14 版本进行了适配[2]。
3. LoadBalancer
LoadBalancer 依赖云服务商提供的 load balancer, 这种模式下 Flink 拿到的 JobManager 地址比较特殊:
1. 如果当前 K8S 集群接入了 load balancer,由于 load balancer 需要一定的准备时间,所以有两种可能
a. client 侧在获取 JobManager 地址时, load balancer 还未准备好, 此时拿到的地址是 {k8s node ip} : {rest port}, 这个地址是不能被外界访问的,需要后续人工通过 kubectl get service <cluster-id>-rest 来查询 external ip
b. client 侧获取 JobManager 地址时,load balancer 已经就绪,此时拿到的地址是 {external ip} : {rest port}
2. 如果当前 K8S 集群不支持 load balancer,此时获取到的 JM 地址和 1.a 一致,也无法通过外界访问。
版本差异:
1.15 之前的版本默认使用了 LoadBalancer
1.15 开始将默认版本调整为 ClusterIP [3]
[1]https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/deployment/resource-providers/native_kubernetes/#accessing-flinks-web-ui
[2]https://issues.apache.org/jira/browse/FLINK-23507
[3]https://issues.apache.org/jira/browse/FLINK-24503
> 2022年4月15日 下午3:49,陈卓宇 <25...@qq.com.INVALID> 写道:
>
> 要开启service服务,将native-session的集群设置为nodePort,绑定到node上面,在启动脚本后可以通过-D参数来进行相关的配置
>
>
> 陈卓宇
>
>
>
>
>
>
>
> ------------------ 原始邮件 ------------------
> 发件人: "user-zh" <hinobleyd@gmail.com <ma...@gmail.com>>;
> 发送时间: 2022年4月15日(星期五) 下午2:40
> 收件人: "user-zh"<user-zh@flink.apache.org <ma...@flink.apache.org>>;
>
> 主题: Re: Native Kubernetes 模式提交任务异常
>
>
>
> 我之前也是类似问题,还没解决,1.14版本,没什么参数设置,就直接执行那个脚本就可以创建集群。创建后给了个地址不能访问,我是自己查了clusterIp才可以访问,创建后显示的那个ip是不能用的。
>
> yu'an huang <h.yuan667@gmail.com <ma...@gmail.com>> 于2022年4月14日周四 10:31写道:
> >
> > Hi 卓宇,
> >
> > 如exception中提到的:Connection refused: rancher-test.ziroom.com/10.30.238.18:8081 <http://rancher-test.ziroom.com/10.30.238.18:8081> <http://rancher-test.ziroom.com/10.30.238.18:8081> <http://rancher-test.ziroom.com/10.30.238.18:8081>>;。由于session cluster地址不可访问所以job graph提交失败了。一般原因是由于Kubernetes集群存在网络隔离,需要适当的设置service才能访问和提交job。
> >
> > 请问你是如何设置的你的session cluster呢,可以访问UI吗?
> > 你的参数kubernetes.rest-service.exposed.type设置的是什么?
> > 你使用Flink的版本又是什么呢?
> >
> >
> >
> >
> >
> >
> > > On 14 Apr 2022, at 9:25 AM, 陈卓宇 <2572805166@qq.com.INVALID <ma...@qq.com.INVALID>> wrote:
> > >
> > > 按照官方文档进行以下操作:
> > > # (1) Start Kubernetes session $ ./bin/kubernetes-session.sh -Dkubernetes.cluster-id=my-first-flink-cluster # (2) Submit example job $ ./bin/flink run \ --target kubernetes-session \ -Dkubernetes.cluster-id=my-first-flink-cluster \ ./examples/streaming/TopSpeedWindowing.jar
> > > 报错:org.apache.flink.client.program.ProgramInvocationException: The main method caused an error: Failed to execute job 'CarTopSpeedWindowingExample'.
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:360)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:213)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:114)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:816)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:248)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.cli.CliFrontend.parseAndRun(CliFrontend.java:1058)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:1136)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.runtime.security.contexts.NoOpSecurityContext.runSecured(NoOpSecurityContext.java:28)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:1136)
> > > Caused by: org.apache.flink.util.FlinkException: Failed to execute job 'CarTopSpeedWindowingExample'.
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1918)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.program.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:135)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.program.StreamContextEnvironment.execute(StreamContextEnvironment.java:76)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1782)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.streaming.examples.windowing.TopSpeedWindowing.main(TopSpeedWindowing.java:99)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.lang.reflect.Method.invoke(Method.java:498)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:343)
> > > &nbsp; &nbsp; &nbsp; &nbsp;... 8 more
> > > Caused by: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit JobGraph.
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:400)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:884)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:866)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1990)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$9(FutureUtils.java:390)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1990)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.runtime.rest.RestClient.lambda$submitRequest$1(RestClient.java:430)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:577)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListeners0(DefaultPromise.java:570)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:549)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:490)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:615)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.setFailure0(DefaultPromise.java:608)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.tryFailure(DefaultPromise.java:117)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.fulfillConnectPromise(AbstractNioChannel.java:321)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:337)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:702)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:650)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:576)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.lang.Thread.run(Thread.java:748)
> > > Caused by: org.apache.flink.runtime.concurrent.FutureUtils$RetryException: Could not complete the operation. Number of retries has been exhausted.
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$9(FutureUtils.java:386)
> > > &nbsp; &nbsp; &nbsp; &nbsp;... 21 more
> > > Caused by: java.util.concurrent.CompletionException: org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: rancher-test.ziroom.com/10.30.238.18:8081
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:292)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:308)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.uniCompose(CompletableFuture.java:957)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:940)
> > > &nbsp; &nbsp; &nbsp; &nbsp;... 19 more
> > > Caused by: org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: rancher-test.ziroom.com/10.30.238.18:8081
> > > Caused by: java.net.ConnectException: Connection refused
> > > &nbsp; &nbsp; &nbsp; &nbsp;at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:330)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:334)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:702)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:650)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:576)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
> > > &nbsp; &nbsp; &nbsp; &nbsp;at java.lang.Thread.run(Thread.java:748)
> > >
> > > 请教大佬我该如何解决此问题?
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > > 卓
> > >
> > >
> > > &nbsp;
> >
回复: Native Kubernetes 模式提交任务异常
Posted by 陈卓宇 <25...@qq.com.INVALID>.
要开启service服务,将native-session的集群设置为nodePort,绑定到node上面,在启动脚本后可以通过-D参数来进行相关的配置
陈卓宇
------------------ 原始邮件 ------------------
发件人: "user-zh" <hinobleyd@gmail.com>;
发送时间: 2022年4月15日(星期五) 下午2:40
收件人: "user-zh"<user-zh@flink.apache.org>;
主题: Re: Native Kubernetes 模式提交任务异常
我之前也是类似问题,还没解决,1.14版本,没什么参数设置,就直接执行那个脚本就可以创建集群。创建后给了个地址不能访问,我是自己查了clusterIp才可以访问,创建后显示的那个ip是不能用的。
yu'an huang <h.yuan667@gmail.com> 于2022年4月14日周四 10:31写道:
>
> Hi 卓宇,
>
> 如exception中提到的:Connection refused: rancher-test.ziroom.com/10.30.238.18:8081 <http://rancher-test.ziroom.com/10.30.238.18:8081>。由于session cluster地址不可访问所以job graph提交失败了。一般原因是由于Kubernetes集群存在网络隔离,需要适当的设置service才能访问和提交job。
>
> 请问你是如何设置的你的session cluster呢,可以访问UI吗?
> 你的参数kubernetes.rest-service.exposed.type设置的是什么?
> 你使用Flink的版本又是什么呢?
>
>
>
>
>
>
> > On 14 Apr 2022, at 9:25 AM, 陈卓宇 <2572805166@qq.com.INVALID> wrote:
> >
> > 按照官方文档进行以下操作:
> > # (1) Start Kubernetes session $ ./bin/kubernetes-session.sh -Dkubernetes.cluster-id=my-first-flink-cluster # (2) Submit example job $ ./bin/flink run \ --target kubernetes-session \ -Dkubernetes.cluster-id=my-first-flink-cluster \ ./examples/streaming/TopSpeedWindowing.jar
> > 报错:org.apache.flink.client.program.ProgramInvocationException: The main method caused an error: Failed to execute job 'CarTopSpeedWindowingExample'.
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:360)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:213)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:114)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:816)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:248)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.cli.CliFrontend.parseAndRun(CliFrontend.java:1058)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:1136)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.runtime.security.contexts.NoOpSecurityContext.runSecured(NoOpSecurityContext.java:28)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:1136)
> > Caused by: org.apache.flink.util.FlinkException: Failed to execute job 'CarTopSpeedWindowingExample'.
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1918)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.program.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:135)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.program.StreamContextEnvironment.execute(StreamContextEnvironment.java:76)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1782)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.streaming.examples.windowing.TopSpeedWindowing.main(TopSpeedWindowing.java:99)
> > &nbsp; &nbsp; &nbsp; &nbsp;at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> > &nbsp; &nbsp; &nbsp; &nbsp;at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> > &nbsp; &nbsp; &nbsp; &nbsp;at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.lang.reflect.Method.invoke(Method.java:498)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:343)
> > &nbsp; &nbsp; &nbsp; &nbsp;... 8 more
> > Caused by: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit JobGraph.
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:400)
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:884)
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:866)
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1990)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$9(FutureUtils.java:390)
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1990)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.runtime.rest.RestClient.lambda$submitRequest$1(RestClient.java:430)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:577)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListeners0(DefaultPromise.java:570)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:549)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:490)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:615)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.setFailure0(DefaultPromise.java:608)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.tryFailure(DefaultPromise.java:117)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.fulfillConnectPromise(AbstractNioChannel.java:321)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:337)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:702)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:650)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:576)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.lang.Thread.run(Thread.java:748)
> > Caused by: org.apache.flink.runtime.concurrent.FutureUtils$RetryException: Could not complete the operation. Number of retries has been exhausted.
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$9(FutureUtils.java:386)
> > &nbsp; &nbsp; &nbsp; &nbsp;... 21 more
> > Caused by: java.util.concurrent.CompletionException: org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: rancher-test.ziroom.com/10.30.238.18:8081
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:292)
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:308)
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture.uniCompose(CompletableFuture.java:957)
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:940)
> > &nbsp; &nbsp; &nbsp; &nbsp;... 19 more
> > Caused by: org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: rancher-test.ziroom.com/10.30.238.18:8081
> > Caused by: java.net.ConnectException: Connection refused
> > &nbsp; &nbsp; &nbsp; &nbsp;at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
> > &nbsp; &nbsp; &nbsp; &nbsp;at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:330)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:334)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:702)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:650)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:576)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
> > &nbsp; &nbsp; &nbsp; &nbsp;at org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
> > &nbsp; &nbsp; &nbsp; &nbsp;at java.lang.Thread.run(Thread.java:748)
> >
> > 请教大佬我该如何解决此问题?
> >
> >
> >
> >
> >
> >
> >
> >
> >
> > 卓
> >
> >
> > &nbsp;
>
Re: Native Kubernetes 模式提交任务异常
Posted by yidan zhao <hi...@gmail.com>.
我之前也是类似问题,还没解决,1.14版本,没什么参数设置,就直接执行那个脚本就可以创建集群。创建后给了个地址不能访问,我是自己查了clusterIp才可以访问,创建后显示的那个ip是不能用的。
yu'an huang <h....@gmail.com> 于2022年4月14日周四 10:31写道:
>
> Hi 卓宇,
>
> 如exception中提到的:Connection refused: rancher-test.ziroom.com/10.30.238.18:8081 <http://rancher-test.ziroom.com/10.30.238.18:8081>。由于session cluster地址不可访问所以job graph提交失败了。一般原因是由于Kubernetes集群存在网络隔离,需要适当的设置service才能访问和提交job。
>
> 请问你是如何设置的你的session cluster呢,可以访问UI吗?
> 你的参数kubernetes.rest-service.exposed.type设置的是什么?
> 你使用Flink的版本又是什么呢?
>
>
>
>
>
>
> > On 14 Apr 2022, at 9:25 AM, 陈卓宇 <25...@qq.com.INVALID> wrote:
> >
> > 按照官方文档进行以下操作:
> > # (1) Start Kubernetes session $ ./bin/kubernetes-session.sh -Dkubernetes.cluster-id=my-first-flink-cluster # (2) Submit example job $ ./bin/flink run \ --target kubernetes-session \ -Dkubernetes.cluster-id=my-first-flink-cluster \ ./examples/streaming/TopSpeedWindowing.jar
> > 报错:org.apache.flink.client.program.ProgramInvocationException: The main method caused an error: Failed to execute job 'CarTopSpeedWindowingExample'.
> > at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:360)
> > at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:213)
> > at org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:114)
> > at org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:816)
> > at org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:248)
> > at org.apache.flink.client.cli.CliFrontend.parseAndRun(CliFrontend.java:1058)
> > at org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:1136)
> > at org.apache.flink.runtime.security.contexts.NoOpSecurityContext.runSecured(NoOpSecurityContext.java:28)
> > at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:1136)
> > Caused by: org.apache.flink.util.FlinkException: Failed to execute job 'CarTopSpeedWindowingExample'.
> > at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1918)
> > at org.apache.flink.client.program.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:135)
> > at org.apache.flink.client.program.StreamContextEnvironment.execute(StreamContextEnvironment.java:76)
> > at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1782)
> > at org.apache.flink.streaming.examples.windowing.TopSpeedWindowing.main(TopSpeedWindowing.java:99)
> > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> > at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> > at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> > at java.lang.reflect.Method.invoke(Method.java:498)
> > at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:343)
> > ... 8 more
> > Caused by: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit JobGraph.
> > at org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:400)
> > at java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:884)
> > at java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:866)
> > at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
> > at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1990)
> > at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$9(FutureUtils.java:390)
> > at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
> > at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
> > at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
> > at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1990)
> > at org.apache.flink.runtime.rest.RestClient.lambda$submitRequest$1(RestClient.java:430)
> > at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:577)
> > at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListeners0(DefaultPromise.java:570)
> > at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:549)
> > at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:490)
> > at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:615)
> > at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.setFailure0(DefaultPromise.java:608)
> > at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.tryFailure(DefaultPromise.java:117)
> > at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.fulfillConnectPromise(AbstractNioChannel.java:321)
> > at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:337)
> > at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:702)
> > at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:650)
> > at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:576)
> > at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
> > at org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
> > at org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
> > at java.lang.Thread.run(Thread.java:748)
> > Caused by: org.apache.flink.runtime.concurrent.FutureUtils$RetryException: Could not complete the operation. Number of retries has been exhausted.
> > at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$9(FutureUtils.java:386)
> > ... 21 more
> > Caused by: java.util.concurrent.CompletionException: org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: rancher-test.ziroom.com/10.30.238.18:8081
> > at java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:292)
> > at java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:308)
> > at java.util.concurrent.CompletableFuture.uniCompose(CompletableFuture.java:957)
> > at java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:940)
> > ... 19 more
> > Caused by: org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: rancher-test.ziroom.com/10.30.238.18:8081
> > Caused by: java.net.ConnectException: Connection refused
> > at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
> > at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
> > at org.apache.flink.shaded.netty4.io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:330)
> > at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:334)
> > at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:702)
> > at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:650)
> > at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:576)
> > at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
> > at org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
> > at org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
> > at java.lang.Thread.run(Thread.java:748)
> >
> > 请教大佬我该如何解决此问题?
> >
> >
> >
> >
> >
> >
> >
> >
> >
> > 卓
> >
> >
> >
>
Re: Native Kubernetes 模式提交任务异常
Posted by yu'an huang <h....@gmail.com>.
Hi 卓宇,
如exception中提到的:Connection refused: rancher-test.ziroom.com/10.30.238.18:8081 <http://rancher-test.ziroom.com/10.30.238.18:8081>。由于session cluster地址不可访问所以job graph提交失败了。一般原因是由于Kubernetes集群存在网络隔离,需要适当的设置service才能访问和提交job。
请问你是如何设置的你的session cluster呢,可以访问UI吗?
你的参数kubernetes.rest-service.exposed.type设置的是什么?
你使用Flink的版本又是什么呢?
> On 14 Apr 2022, at 9:25 AM, 陈卓宇 <25...@qq.com.INVALID> wrote:
>
> 按照官方文档进行以下操作:
> # (1) Start Kubernetes session $ ./bin/kubernetes-session.sh -Dkubernetes.cluster-id=my-first-flink-cluster # (2) Submit example job $ ./bin/flink run \ --target kubernetes-session \ -Dkubernetes.cluster-id=my-first-flink-cluster \ ./examples/streaming/TopSpeedWindowing.jar
> 报错:org.apache.flink.client.program.ProgramInvocationException: The main method caused an error: Failed to execute job 'CarTopSpeedWindowingExample'.
> at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:360)
> at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:213)
> at org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:114)
> at org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:816)
> at org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:248)
> at org.apache.flink.client.cli.CliFrontend.parseAndRun(CliFrontend.java:1058)
> at org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:1136)
> at org.apache.flink.runtime.security.contexts.NoOpSecurityContext.runSecured(NoOpSecurityContext.java:28)
> at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:1136)
> Caused by: org.apache.flink.util.FlinkException: Failed to execute job 'CarTopSpeedWindowingExample'.
> at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1918)
> at org.apache.flink.client.program.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:135)
> at org.apache.flink.client.program.StreamContextEnvironment.execute(StreamContextEnvironment.java:76)
> at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1782)
> at org.apache.flink.streaming.examples.windowing.TopSpeedWindowing.main(TopSpeedWindowing.java:99)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:343)
> ... 8 more
> Caused by: org.apache.flink.runtime.client.JobSubmissionException: Failed to submit JobGraph.
> at org.apache.flink.client.program.rest.RestClusterClient.lambda$submitJob$7(RestClusterClient.java:400)
> at java.util.concurrent.CompletableFuture.uniExceptionally(CompletableFuture.java:884)
> at java.util.concurrent.CompletableFuture$UniExceptionally.tryFire(CompletableFuture.java:866)
> at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
> at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1990)
> at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$9(FutureUtils.java:390)
> at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
> at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
> at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
> at java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:1990)
> at org.apache.flink.runtime.rest.RestClient.lambda$submitRequest$1(RestClient.java:430)
> at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:577)
> at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListeners0(DefaultPromise.java:570)
> at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:549)
> at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:490)
> at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:615)
> at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.setFailure0(DefaultPromise.java:608)
> at org.apache.flink.shaded.netty4.io.netty.util.concurrent.DefaultPromise.tryFailure(DefaultPromise.java:117)
> at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.fulfillConnectPromise(AbstractNioChannel.java:321)
> at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:337)
> at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:702)
> at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:650)
> at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:576)
> at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
> at org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
> at org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
> at java.lang.Thread.run(Thread.java:748)
> Caused by: org.apache.flink.runtime.concurrent.FutureUtils$RetryException: Could not complete the operation. Number of retries has been exhausted.
> at org.apache.flink.runtime.concurrent.FutureUtils.lambda$retryOperationWithDelay$9(FutureUtils.java:386)
> ... 21 more
> Caused by: java.util.concurrent.CompletionException: org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: rancher-test.ziroom.com/10.30.238.18:8081
> at java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:292)
> at java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:308)
> at java.util.concurrent.CompletableFuture.uniCompose(CompletableFuture.java:957)
> at java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:940)
> ... 19 more
> Caused by: org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: rancher-test.ziroom.com/10.30.238.18:8081
> Caused by: java.net.ConnectException: Connection refused
> at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
> at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
> at org.apache.flink.shaded.netty4.io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:330)
> at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:334)
> at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:702)
> at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:650)
> at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:576)
> at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
> at org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
> at org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
> at java.lang.Thread.run(Thread.java:748)
>
> 请教大佬我该如何解决此问题?
>
>
>
>
>
>
>
>
>
> 卓
>
>
>