You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@flink.apache.org by GitBox <gi...@apache.org> on 2021/01/21 18:51:07 UTC

[GitHub] [flink] rkhachatryan commented on a change in pull request #14721: [FLINK-20645][network] Fix corrupted unaligned checkpoints.

rkhachatryan commented on a change in pull request #14721:
URL: https://github.com/apache/flink/pull/14721#discussion_r562029491



##########
File path: flink-runtime/src/main/java/org/apache/flink/runtime/io/network/logger/NetworkActionsLogger.java
##########
@@ -18,51 +18,89 @@
 
 package org.apache.flink.runtime.io.network.logger;
 
+import org.apache.flink.runtime.checkpoint.channel.InputChannelInfo;
+import org.apache.flink.runtime.checkpoint.channel.ResultSubpartitionInfo;
 import org.apache.flink.runtime.io.network.buffer.Buffer;
 import org.apache.flink.runtime.io.network.buffer.BufferConsumer;
+import org.apache.flink.runtime.io.network.partition.consumer.ChannelStatePersister;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.Arrays;
-
-import static org.apache.flink.util.Preconditions.checkState;
-
 /** Utility class for logging actions that happened in the network stack for debugging purposes. */
 public class NetworkActionsLogger {
     private static final Logger LOG = LoggerFactory.getLogger(NetworkActionsLogger.class);
-
     private static final boolean ENABLED = LOG.isTraceEnabled();
     private static final boolean INCLUDE_HASH = true;
 
-    public static void log(Class<?> clazz, String action, Buffer buffer) {
+    public static void traceInput(
+            Class<?> clazz,
+            String action,
+            Buffer buffer,
+            InputChannelInfo channelInfo,
+            ChannelStatePersister channelStatePersister,
+            int sequenceNumber) {
+        if (ENABLED) {
+            LOG.trace(
+                    "{}#{} {}, seq {}, {} @ {}",
+                    clazz.getSimpleName(),

Review comment:
       This call has some overhead, as well as `getClass()` (though this code shouldn't execute in prod it can affect debugging).
   How about using some string constant (in caller)?

##########
File path: flink-runtime/src/main/java/org/apache/flink/runtime/io/network/logger/NetworkActionsLogger.java
##########
@@ -18,51 +18,89 @@
 
 package org.apache.flink.runtime.io.network.logger;
 
+import org.apache.flink.runtime.checkpoint.channel.InputChannelInfo;
+import org.apache.flink.runtime.checkpoint.channel.ResultSubpartitionInfo;
 import org.apache.flink.runtime.io.network.buffer.Buffer;
 import org.apache.flink.runtime.io.network.buffer.BufferConsumer;
+import org.apache.flink.runtime.io.network.partition.consumer.ChannelStatePersister;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.Arrays;
-
-import static org.apache.flink.util.Preconditions.checkState;
-
 /** Utility class for logging actions that happened in the network stack for debugging purposes. */
 public class NetworkActionsLogger {
     private static final Logger LOG = LoggerFactory.getLogger(NetworkActionsLogger.class);
-
     private static final boolean ENABLED = LOG.isTraceEnabled();
     private static final boolean INCLUDE_HASH = true;
 
-    public static void log(Class<?> clazz, String action, Buffer buffer) {
+    public static void traceInput(
+            Class<?> clazz,
+            String action,
+            Buffer buffer,
+            InputChannelInfo channelInfo,
+            ChannelStatePersister channelStatePersister,
+            int sequenceNumber) {
+        if (ENABLED) {
+            LOG.trace(
+                    "{}#{} {}, seq {}, {} @ {}",
+                    clazz.getSimpleName(),
+                    action,
+                    buffer.toDebugString(INCLUDE_HASH),
+                    sequenceNumber,
+                    channelStatePersister,
+                    channelInfo);
+        }
+    }
+
+    public static void traceOutput(
+            Class<?> clazz, String action, Buffer buffer, ResultSubpartitionInfo channelInfo) {
+        if (ENABLED) {
+            LOG.trace(
+                    "{}#{} {} @ {}",
+                    clazz.getSimpleName(),
+                    action,
+                    buffer.toDebugString(INCLUDE_HASH),
+                    channelInfo);
+        }
+    }
+
+    public static void traceRecover(
+            Class<?> clazz, String action, Buffer buffer, InputChannelInfo channelInfo) {
         if (ENABLED) {
-            LOG.trace("{}#{} buffer = [{}]", clazz.getSimpleName(), action, toPrettyString(buffer));
+            LOG.trace(
+                    "{}#{} {} @ {}",
+                    clazz.getSimpleName(),
+                    action,
+                    buffer.toDebugString(INCLUDE_HASH),
+                    channelInfo);
         }
     }
 
-    public static void log(Class<?> clazz, String action, BufferConsumer bufferConsumer) {
+    public static void traceRecover(
+            Class<?> clazz,
+            String action,
+            BufferConsumer bufferConsumer,
+            ResultSubpartitionInfo channelInfo) {
         if (ENABLED) {
-            Buffer buffer = null;
-            try (BufferConsumer copiedBufferConsumer = bufferConsumer.copy()) {
-                buffer = copiedBufferConsumer.build();
-                log(clazz, action, buffer);
-                checkState(copiedBufferConsumer.isFinished());
-            } finally {
-                if (buffer != null) {
-                    buffer.recycleBuffer();
-                }
-            }
+            LOG.trace(
+                    "{}#{} {} @ {}",
+                    clazz.getSimpleName(),
+                    action,
+                    bufferConsumer.toDebugString(INCLUDE_HASH),
+                    channelInfo);
         }
     }
 
-    private static String toPrettyString(Buffer buffer) {
-        StringBuilder prettyString = new StringBuilder("size=").append(buffer.getSize());
-        if (INCLUDE_HASH) {
-            byte[] bytes = new byte[buffer.getSize()];
-            buffer.readOnlySlice().asByteBuf().readBytes(bytes);
-            prettyString.append(", hash=").append(Arrays.hashCode(bytes));
+    public static void traceWrite(

Review comment:
       nit: `tracePersist`?

##########
File path: flink-runtime/src/main/java/org/apache/flink/runtime/io/network/partition/consumer/ChannelStatePersister.java
##########
@@ -62,9 +68,18 @@
     }
 
     protected void startPersisting(long barrierId, List<Buffer> knownBuffers) {
+        LOG.debug(

Review comment:
       Does it makes sense to log `channelInfo`?
   
   nit: extract log method?

##########
File path: flink-runtime/src/main/java/org/apache/flink/runtime/io/network/partition/consumer/ChannelStatePersister.java
##########
@@ -95,16 +115,39 @@ protected void maybePersist(Buffer buffer) {
     protected Optional<Long> checkForBarrier(Buffer buffer) throws IOException {
         final AbstractEvent event = parseEvent(buffer);
         if (event instanceof CheckpointBarrier) {
-            if (((CheckpointBarrier) event).getId() >= lastSeenBarrier) {
+            final long barrierId = ((CheckpointBarrier) event).getId();
+            long expectedBarrierId =
+                    checkpointStatus == CheckpointStatus.COMPLETED
+                            ? lastSeenBarrier + 1

Review comment:
       Good catch! :1st_place_medal: :)

##########
File path: flink-runtime/src/main/java/org/apache/flink/runtime/io/network/logger/NetworkActionsLogger.java
##########
@@ -18,51 +18,89 @@
 
 package org.apache.flink.runtime.io.network.logger;
 
+import org.apache.flink.runtime.checkpoint.channel.InputChannelInfo;
+import org.apache.flink.runtime.checkpoint.channel.ResultSubpartitionInfo;
 import org.apache.flink.runtime.io.network.buffer.Buffer;
 import org.apache.flink.runtime.io.network.buffer.BufferConsumer;
+import org.apache.flink.runtime.io.network.partition.consumer.ChannelStatePersister;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.Arrays;
-
-import static org.apache.flink.util.Preconditions.checkState;
-
 /** Utility class for logging actions that happened in the network stack for debugging purposes. */
 public class NetworkActionsLogger {
     private static final Logger LOG = LoggerFactory.getLogger(NetworkActionsLogger.class);
-
     private static final boolean ENABLED = LOG.isTraceEnabled();
     private static final boolean INCLUDE_HASH = true;
 
-    public static void log(Class<?> clazz, String action, Buffer buffer) {
+    public static void traceInput(
+            Class<?> clazz,
+            String action,
+            Buffer buffer,
+            InputChannelInfo channelInfo,
+            ChannelStatePersister channelStatePersister,
+            int sequenceNumber) {
+        if (ENABLED) {
+            LOG.trace(
+                    "{}#{} {}, seq {}, {} @ {}",
+                    clazz.getSimpleName(),
+                    action,

Review comment:
       nit: I think `action` is not needed if we have method per action.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org