You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@seatunnel.apache.org by ga...@apache.org on 2022/10/19 03:37:36 UTC

[incubator-seatunnel] branch dev updated: [Engine] [Checkpoint] fix Checkpoint can't deserialize with protostuff (#3131)

This is an automated email from the ASF dual-hosted git repository.

gaojun2048 pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-seatunnel.git


The following commit(s) were added to refs/heads/dev by this push:
     new 4cecedbb8 [Engine] [Checkpoint] fix Checkpoint can't deserialize with protostuff (#3131)
4cecedbb8 is described below

commit 4cecedbb87fc53ad39fc0631e57b92ce60fb6c63
Author: Hisoka <fa...@qq.com>
AuthorDate: Wed Oct 19 11:37:31 2022 +0800

    [Engine] [Checkpoint] fix Checkpoint can't deserialize with protostuff (#3131)
---
 .../engine/server/checkpoint/ActionState.java      | 10 +--
 .../server/checkpoint/CheckpointCoordinator.java   |  2 +-
 .../engine/server/checkpoint/TaskStatistics.java   | 14 +++--
 .../engine/server/checkpoint/StorageTest.java      | 73 ++++++++++++++++++++++
 4 files changed, 88 insertions(+), 11 deletions(-)

diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/ActionState.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/ActionState.java
index e91e018f7..c90c9e769 100644
--- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/ActionState.java
+++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/ActionState.java
@@ -18,6 +18,8 @@
 package org.apache.seatunnel.engine.server.checkpoint;
 
 import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
 
 public class ActionState implements Serializable {
 
@@ -31,7 +33,7 @@ public class ActionState implements Serializable {
     /**
      * The handles to states created by the parallel actions: action index -> action state.
      */
-    private final ActionSubtaskState[] subtaskStates;
+    private final List<ActionSubtaskState> subtaskStates;
 
     private ActionSubtaskState coordinatorState;
 
@@ -42,7 +44,7 @@ public class ActionState implements Serializable {
 
     public ActionState(String actionId, int parallelism) {
         this.actionId = actionId;
-        this.subtaskStates = new ActionSubtaskState[parallelism];
+        this.subtaskStates = Arrays.asList(new ActionSubtaskState[parallelism]);
         this.parallelism = parallelism;
     }
 
@@ -50,7 +52,7 @@ public class ActionState implements Serializable {
         return actionId;
     }
 
-    public ActionSubtaskState[] getSubtaskStates() {
+    public List<ActionSubtaskState> getSubtaskStates() {
         return subtaskStates;
     }
 
@@ -67,6 +69,6 @@ public class ActionState implements Serializable {
             coordinatorState = state;
             return;
         }
-        subtaskStates[index] = state;
+        subtaskStates.set(index, state);
     }
 }
diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java
index 557c365b9..a2dab3d48 100644
--- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java
+++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java
@@ -192,7 +192,7 @@ public class CheckpointCoordinator {
                         return;
                     }
                     for (int i = tuple.f1(); i < actionState.getParallelism(); i += currentParallelism) {
-                        states.add(actionState.getSubtaskStates()[i]);
+                        states.add(actionState.getSubtaskStates().get(i));
                     }
                 });
         }
diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/TaskStatistics.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/TaskStatistics.java
index 3b74a6903..a38dc37f7 100644
--- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/TaskStatistics.java
+++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/TaskStatistics.java
@@ -21,6 +21,8 @@ import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 
 import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
 
 public class TaskStatistics implements Serializable {
     /**
@@ -28,7 +30,7 @@ public class TaskStatistics implements Serializable {
      */
     private final Long jobVertexId;
 
-    private final SubtaskStatistics[] subtaskStats;
+    private final List<SubtaskStatistics> subtaskStats;
 
     /**
      * Marks whether a subtask is complete;
@@ -42,7 +44,7 @@ public class TaskStatistics implements Serializable {
     TaskStatistics(Long jobVertexId, int parallelism) {
         this.jobVertexId = checkNotNull(jobVertexId, "JobVertexID");
         checkArgument(parallelism > 0, "the parallelism of task <= 0");
-        this.subtaskStats = new SubtaskStatistics[parallelism];
+        this.subtaskStats = Arrays.asList(new SubtaskStatistics[parallelism]);
         this.subtaskCompleted = new boolean[parallelism];
     }
 
@@ -50,12 +52,12 @@ public class TaskStatistics implements Serializable {
         checkNotNull(subtask, "Subtask stats");
         int subtaskIndex = subtask.getSubtaskIndex();
 
-        if (subtaskIndex < 0 || subtaskIndex >= subtaskStats.length) {
+        if (subtaskIndex < 0 || subtaskIndex >= subtaskStats.size()) {
             return false;
         }
 
-        if (subtaskStats[subtaskIndex] == null) {
-            subtaskStats[subtaskIndex] = subtask;
+        if (subtaskStats.get(subtaskIndex) == null) {
+            subtaskStats.set(subtaskIndex, subtask);
             numAcknowledgedSubtasks++;
             return true;
         } else {
@@ -85,7 +87,7 @@ public class TaskStatistics implements Serializable {
         return jobVertexId;
     }
 
-    public SubtaskStatistics[] getSubtaskStats() {
+    public List<SubtaskStatistics> getSubtaskStats() {
         return subtaskStats;
     }
 
diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/StorageTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/StorageTest.java
new file mode 100644
index 000000000..f06d1d5bd
--- /dev/null
+++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/StorageTest.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.engine.server.checkpoint;
+
+import org.apache.seatunnel.engine.checkpoint.storage.PipelineState;
+import org.apache.seatunnel.engine.checkpoint.storage.common.ProtoStuffSerializer;
+import org.apache.seatunnel.engine.core.checkpoint.CheckpointType;
+
+import org.apache.commons.io.FileUtils;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.time.Instant;
+import java.util.HashMap;
+import java.util.Map;
+
+public class StorageTest {
+
+    @Test
+    public void localFileTest() throws IOException {
+
+        Map<Long, TaskStatistics> taskStatisticsMap = new HashMap<>();
+        taskStatisticsMap.put(1L, new TaskStatistics(1L, 32));
+        Map<Long, ActionState> actionStateMap = new HashMap<>();
+        actionStateMap.put(2L, new ActionState("test", 13));
+        CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint(1, 2, 4324,
+            Instant.now().toEpochMilli(),
+            CheckpointType.COMPLETED_POINT_TYPE,
+            Instant.now().toEpochMilli(),
+            actionStateMap,
+            taskStatisticsMap);
+
+        ProtoStuffSerializer protoStuffSerializer = new ProtoStuffSerializer();
+        byte[] data = protoStuffSerializer.serialize(completedCheckpoint);
+        PipelineState pipelineState = PipelineState.builder()
+            .checkpointId(1)
+            .jobId(String.valueOf(1))
+            .pipelineId(1)
+            .states(data)
+            .build();
+
+        byte[] pipeData = protoStuffSerializer.serialize(pipelineState);
+
+        File file = new File("/tmp/seatunnel/test.data");
+
+        FileUtils.writeByteArrayToFile(file, pipeData);
+
+        byte[] fileData = FileUtils.readFileToByteArray(file);
+
+        PipelineState state = protoStuffSerializer.deserialize(fileData, PipelineState.class);
+
+        CompletedCheckpoint checkpoint = new ProtoStuffSerializer().deserialize(state.getStates(), CompletedCheckpoint.class);
+        Assertions.assertNotNull(checkpoint);
+    }
+
+}