You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by mx...@apache.org on 2016/10/18 14:05:32 UTC
[2/2] flink git commit: [FLINK-4829] snapshot accumulators on a
best-effort basis
[FLINK-4829] snapshot accumulators on a best-effort basis
Heartbeats should not fail when accumulators could not be snapshotted. Instead,
we should simply skip the reporting of the failed accumulator. Eventually, the
accumulator will be reported; at the latest, when the job finishes.
This closes #2649
Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/210230c4
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/210230c4
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/210230c4
Branch: refs/heads/release-1.1
Commit: 210230c4ab44b84c28b9a62ff461de0955e67f8f
Parents: c1d6b24
Author: Maximilian Michels <mx...@apache.org>
Authored: Mon Oct 17 14:19:00 2016 +0200
Committer: Maximilian Michels <mx...@apache.org>
Committed: Tue Oct 18 16:05:05 2016 +0200
----------------------------------------------------------------------
.../apache/flink/runtime/taskmanager/TaskManager.scala | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/flink/blob/210230c4/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala b/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala
index 8e787bb..dc2780b 100644
--- a/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala
+++ b/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala
@@ -35,6 +35,7 @@ import com.codahale.metrics.jvm.{BufferPoolMetricSet, GarbageCollectorMetricSet,
import com.codahale.metrics.{Gauge, MetricFilter, MetricRegistry}
import com.fasterxml.jackson.databind.ObjectMapper
import grizzled.slf4j.Logger
+import org.apache.commons.lang3.exception.ExceptionUtils
import org.apache.flink.configuration._
import org.apache.flink.core.fs.FileSystem
import org.apache.flink.core.memory.{HeapMemorySegment, HybridMemorySegment, MemorySegmentFactory, MemoryType}
@@ -1282,9 +1283,15 @@ class TaskManager(
runningTasks.asScala foreach {
case (execID, task) =>
- val registry = task.getAccumulatorRegistry
- val accumulators = registry.getSnapshot
- accumulatorEvents.append(accumulators)
+ try {
+ val registry = task.getAccumulatorRegistry
+ val accumulators = registry.getSnapshot
+ accumulatorEvents.append(accumulators)
+ } catch {
+ case e: Exception =>
+ log.warn("Failed to take accumulator snapshot for task {}.",
+ execID, ExceptionUtils.getRootCause(e))
+ }
}
currentJobManager foreach {