You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user-zh@flink.apache.org by "casel.chen" <ca...@126.com> on 2022/11/02 01:41:19 UTC

Remote system has been silent for too long. (more than 48.0 hours)

今天线上 Flink 1.13.2 作业遇到如下报错,请问是何原因,要如何解决?
作业内容是从kafka topic消费canal json数据写到另一个mysql库表


2022-09-17 19:40:03,088 ERROR akka.remote.Remoting                                         [] - Association to [akka.tcp://flink-metrics@172.19.193.15:34101] with UID [-633015504] irrecoverably failed. Quarantining address.

java.util.concurrent.TimeoutException: Remote system has been silent for too long. (more than 48.0 hours)

        at akka.remote.ReliableDeliverySupervisor$$anonfun$idle$1.applyOrElse(Endpoint.scala:387) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.actor.Actor.aroundReceive(Actor.scala:517) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.actor.Actor.aroundReceive$(Actor.scala:515) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.remote.ReliableDeliverySupervisor.aroundReceive(Endpoint.scala:207) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.actor.ActorCell.invoke(ActorCell.scala:561) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.dispatch.Mailbox.run(Mailbox.scala:225) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.dispatch.Mailbox.exec(Mailbox.scala:235) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

2022-09-25 17:17:21,581 ERROR akka.remote.Remoting                                         [] - Association to [akka.tcp://flink-metrics@172.19.193.15:38805] with UID [1496738655] irrecoverably failed. Quarantining address.

java.util.concurrent.TimeoutException: Remote system has been silent for too long. (more than 48.0 hours)

        at akka.remote.ReliableDeliverySupervisor$$anonfun$idle$1.applyOrElse(Endpoint.scala:387) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.actor.Actor.aroundReceive(Actor.scala:517) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.actor.Actor.aroundReceive$(Actor.scala:515) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.remote.ReliableDeliverySupervisor.aroundReceive(Endpoint.scala:207) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.actor.ActorCell.invoke(ActorCell.scala:561) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.dispatch.Mailbox.run(Mailbox.scala:225) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.dispatch.Mailbox.exec(Mailbox.scala:235) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

        at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) ~[flink-dist_2.12-1.13.2.jar:1.13.2]

Re: Remote system has been silent for too long. (more than 48.0 hours)

Posted by Guo Thompson <gw...@gmail.com>.
感觉是tm gc太久导致的

Weihua Hu <hu...@gmail.com> 于2022年11月2日周三 19:47写道:

> Hi,
> 这种情况一般是这两个 TaskManager 出现故障断开连接了。可以再查看下之前的日志验证下。
>
> Best,
> Weihua
>
>
> On Wed, Nov 2, 2022 at 9:41 AM casel.chen <ca...@126.com> wrote:
>
> > 今天线上 Flink 1.13.2 作业遇到如下报错,请问是何原因,要如何解决?
> > 作业内容是从kafka topic消费canal json数据写到另一个mysql库表
> >
> >
> > 2022-09-17 19:40:03,088 ERROR akka.remote.Remoting
> >                  [] - Association to [akka.tcp://
> > flink-metrics@172.19.193.15:34101] with UID [-633015504] irrecoverably
> > failed. Quarantining address.
> >
> > java.util.concurrent.TimeoutException: Remote system has been silent for
> > too long. (more than 48.0 hours)
> >
> >         at
> >
> akka.remote.ReliableDeliverySupervisor$$anonfun$idle$1.applyOrElse(Endpoint.scala:387)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at akka.actor.Actor.aroundReceive(Actor.scala:517)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at akka.actor.Actor.aroundReceive$(Actor.scala:515)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at
> > akka.remote.ReliableDeliverySupervisor.aroundReceive(Endpoint.scala:207)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at akka.actor.ActorCell.invoke(ActorCell.scala:561)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at akka.dispatch.Mailbox.run(Mailbox.scala:225)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at
> > akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at
> >
> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at
> > akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at
> >
> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> > 2022-09-25 17:17:21,581 ERROR akka.remote.Remoting
> >                  [] - Association to [akka.tcp://
> > flink-metrics@172.19.193.15:38805] with UID [1496738655] irrecoverably
> > failed. Quarantining address.
> >
> > java.util.concurrent.TimeoutException: Remote system has been silent for
> > too long. (more than 48.0 hours)
> >
> >         at
> >
> akka.remote.ReliableDeliverySupervisor$$anonfun$idle$1.applyOrElse(Endpoint.scala:387)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at akka.actor.Actor.aroundReceive(Actor.scala:517)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at akka.actor.Actor.aroundReceive$(Actor.scala:515)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at
> > akka.remote.ReliableDeliverySupervisor.aroundReceive(Endpoint.scala:207)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at akka.actor.ActorCell.invoke(ActorCell.scala:561)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at akka.dispatch.Mailbox.run(Mailbox.scala:225)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at
> > akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at
> >
> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at
> > akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
> >
> >         at
> >
> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> > ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>

Re: Remote system has been silent for too long. (more than 48.0 hours)

Posted by Weihua Hu <hu...@gmail.com>.
Hi,
这种情况一般是这两个 TaskManager 出现故障断开连接了。可以再查看下之前的日志验证下。

Best,
Weihua


On Wed, Nov 2, 2022 at 9:41 AM casel.chen <ca...@126.com> wrote:

> 今天线上 Flink 1.13.2 作业遇到如下报错,请问是何原因,要如何解决?
> 作业内容是从kafka topic消费canal json数据写到另一个mysql库表
>
>
> 2022-09-17 19:40:03,088 ERROR akka.remote.Remoting
>                  [] - Association to [akka.tcp://
> flink-metrics@172.19.193.15:34101] with UID [-633015504] irrecoverably
> failed. Quarantining address.
>
> java.util.concurrent.TimeoutException: Remote system has been silent for
> too long. (more than 48.0 hours)
>
>         at
> akka.remote.ReliableDeliverySupervisor$$anonfun$idle$1.applyOrElse(Endpoint.scala:387)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at akka.actor.Actor.aroundReceive(Actor.scala:517)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at akka.actor.Actor.aroundReceive$(Actor.scala:515)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at
> akka.remote.ReliableDeliverySupervisor.aroundReceive(Endpoint.scala:207)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at akka.actor.ActorCell.invoke(ActorCell.scala:561)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at akka.dispatch.Mailbox.run(Mailbox.scala:225)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at
> akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at
> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at
> akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at
> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
> 2022-09-25 17:17:21,581 ERROR akka.remote.Remoting
>                  [] - Association to [akka.tcp://
> flink-metrics@172.19.193.15:38805] with UID [1496738655] irrecoverably
> failed. Quarantining address.
>
> java.util.concurrent.TimeoutException: Remote system has been silent for
> too long. (more than 48.0 hours)
>
>         at
> akka.remote.ReliableDeliverySupervisor$$anonfun$idle$1.applyOrElse(Endpoint.scala:387)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at akka.actor.Actor.aroundReceive(Actor.scala:517)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at akka.actor.Actor.aroundReceive$(Actor.scala:515)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at
> akka.remote.ReliableDeliverySupervisor.aroundReceive(Endpoint.scala:207)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at akka.actor.ActorCell.invoke(ActorCell.scala:561)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at akka.dispatch.Mailbox.run(Mailbox.scala:225)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at
> akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at
> akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at
> akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]
>
>         at
> akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> ~[flink-dist_2.12-1.13.2.jar:1.13.2]