You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by um...@apache.org on 2022/05/17 15:25:28 UTC

[ozone] branch master updated: HDDS-6660: EC: Add the DN side Reconstruction Handler class. (#3399)

This is an automated email from the ASF dual-hosted git repository.

umamahesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 44cf924f4d HDDS-6660: EC: Add the DN side Reconstruction Handler class. (#3399)
44cf924f4d is described below

commit 44cf924f4db76bcb10bc543f1ce79fecac64676f
Author: Uma Maheswara Rao G <um...@apache.org>
AuthorDate: Tue May 17 08:25:23 2022 -0700

    HDDS-6660: EC: Add the DN side Reconstruction Handler class. (#3399)
---
 .../common/statemachine/DatanodeStateMachine.java  |  9 +++
 .../ReconstructECContainersCommandHandler.java     | 78 ++++++++++++++++++++
 .../ECReconstructionCommandInfo.java               | 82 ++++++++++++++++++++++
 .../ECReconstructionCoordinatorTask.java           | 52 ++++++++++++++
 .../reconstruction/ECReconstructionSupervisor.java | 72 +++++++++++++++++++
 .../container/ec/reconstruction/package-info.java  | 18 +++++
 .../commands/ReconstructECContainersCommand.java   |  5 +-
 .../TestECReconstructionSupervisor.java            | 53 ++++++++++++++
 8 files changed, 368 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
index c09a2bf721..0089522d01 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
@@ -49,9 +49,11 @@ import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.Crea
 import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.DeleteBlocksCommandHandler;
 import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.DeleteContainerCommandHandler;
 import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.FinalizeNewLayoutVersionCommandHandler;
+import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.ReconstructECContainersCommandHandler;
 import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.RefreshVolumeUsageCommandHandler;
 import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.ReplicateContainerCommandHandler;
 import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.SetNodeOperationalStateCommandHandler;
+import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionSupervisor;
 import org.apache.hadoop.ozone.container.keyvalue.TarContainerPacker;
 import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
 import org.apache.hadoop.ozone.container.replication.ContainerReplicator;
@@ -96,6 +98,7 @@ public class DatanodeStateMachine implements Closeable {
   private volatile Thread stateMachineThread = null;
   private Thread cmdProcessThread = null;
   private final ReplicationSupervisor supervisor;
+  private final ECReconstructionSupervisor ecReconstructionSupervisor;
 
   private JvmPauseMonitor jvmPauseMonitor;
   private CertificateClient dnCertClient;
@@ -178,6 +181,10 @@ public class DatanodeStateMachine implements Closeable {
     replicationSupervisorMetrics =
         ReplicationSupervisorMetrics.create(supervisor);
 
+    ecReconstructionSupervisor =
+        new ECReconstructionSupervisor(container.getContainerSet(), context,
+            replicationConfig.getReplicationMaxStreams());
+
 
     // When we add new handlers just adding a new handler here should do the
     // trick.
@@ -187,6 +194,8 @@ public class DatanodeStateMachine implements Closeable {
             conf, dnConf.getBlockDeleteThreads(),
             dnConf.getBlockDeleteQueueLimit()))
         .addHandler(new ReplicateContainerCommandHandler(conf, supervisor))
+        .addHandler(new ReconstructECContainersCommandHandler(conf,
+            ecReconstructionSupervisor))
         .addHandler(new DeleteContainerCommandHandler(
             dnConf.getContainerDeleteThreads()))
         .addHandler(new ClosePipelineCommandHandler())
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java
new file mode 100644
index 0000000000..f4ec45f600
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.common.statemachine.commandhandler;
+
+import org.apache.hadoop.hdds.conf.ConfigurationSource;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type;
+import org.apache.hadoop.ozone.container.common.statemachine.SCMConnectionManager;
+import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
+import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionCommandInfo;
+import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionCoordinatorTask;
+import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionSupervisor;
+import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
+import org.apache.hadoop.ozone.protocol.commands.ReconstructECContainersCommand;
+import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
+
+/**
+ * Command handler for reconstructing the lost EC containers.
+ */
+public class ReconstructECContainersCommandHandler implements CommandHandler {
+
+  private ECReconstructionSupervisor supervisor;
+  private ConfigurationSource conf;
+
+  public ReconstructECContainersCommandHandler(ConfigurationSource conf,
+      ECReconstructionSupervisor supervisor) {
+    this.conf = conf;
+    this.supervisor = supervisor;
+  }
+
+  @Override
+  public void handle(SCMCommand command, OzoneContainer container,
+      StateContext context, SCMConnectionManager connectionManager) {
+    ReconstructECContainersCommand ecContainersCommand =
+        (ReconstructECContainersCommand) command;
+    ECReconstructionCommandInfo reconstructionCommandInfo =
+        new ECReconstructionCommandInfo(ecContainersCommand.getContainerID(),
+            ecContainersCommand.getEcReplicationConfig(),
+            ecContainersCommand.getMissingContainerIndexes(),
+            ecContainersCommand.getSources(),
+            ecContainersCommand.getTargetDatanodes());
+    this.supervisor.addTask(
+        new ECReconstructionCoordinatorTask(reconstructionCommandInfo));
+  }
+
+  @Override
+  public Type getCommandType() {
+    return Type.reconstructECContainersCommand;
+  }
+
+  @Override
+  public int getInvocationCount() {
+    return 0;
+  }
+
+  @Override
+  public long getAverageRunTime() {
+    return 0;
+  }
+
+  public ConfigurationSource getConf() {
+    return conf;
+  }
+}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCommandInfo.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCommandInfo.java
new file mode 100644
index 0000000000..c95f9646f8
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCommandInfo.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.ec.reconstruction;
+
+import org.apache.hadoop.hdds.client.ECReplicationConfig;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.ozone.protocol.commands.ReconstructECContainersCommand;
+import org.apache.hadoop.ozone.protocol.commands.ReconstructECContainersCommand.DatanodeDetailsAndReplicaIndex;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * This class is to keep the required EC reconstruction info.
+ */
+public class ECReconstructionCommandInfo {
+  private long containerID;
+  private ECReplicationConfig ecReplicationConfig;
+  private byte[] missingContainerIndexes;
+  private List<ReconstructECContainersCommand.DatanodeDetailsAndReplicaIndex>
+      sources;
+  private List<DatanodeDetails> targetDatanodes;
+
+  public ECReconstructionCommandInfo(long containerID,
+      ECReplicationConfig ecReplicationConfig, byte[] missingContainerIndexes,
+      List<DatanodeDetailsAndReplicaIndex> sources,
+      List<DatanodeDetails> targetDatanodes) {
+    this.containerID = containerID;
+    this.ecReplicationConfig = ecReplicationConfig;
+    this.missingContainerIndexes =
+        Arrays.copyOf(missingContainerIndexes, missingContainerIndexes.length);
+    this.sources = sources;
+    this.targetDatanodes = targetDatanodes;
+  }
+
+  public long getContainerID() {
+    return containerID;
+  }
+
+  public byte[] getMissingContainerIndexes() {
+    return Arrays
+        .copyOf(missingContainerIndexes, missingContainerIndexes.length);
+  }
+
+  public ECReplicationConfig getEcReplicationConfig() {
+    return ecReplicationConfig;
+  }
+
+  public List<DatanodeDetailsAndReplicaIndex> getSources() {
+    return sources;
+  }
+
+  public List<DatanodeDetails> getTargetDatanodes() {
+    return targetDatanodes;
+  }
+
+  @Override
+  public String toString() {
+    return "ECReconstructionCommandInfo{"
+        + "containerID=" + containerID
+        + ", ecReplicationConfig=" + ecReplicationConfig
+        + ", missingContainerIndexes=" + Arrays
+        .toString(missingContainerIndexes)
+        + ", sources=" + sources
+        + ", targetDatanodes=" + targetDatanodes + '}';
+  }
+}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java
new file mode 100644
index 0000000000..24168e5f69
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.ec.reconstruction;
+
+/**
+ * This is the actual EC reconstruction coordination task.
+ */
+public class ECReconstructionCoordinatorTask implements Runnable {
+  private ECReconstructionCommandInfo reconstructionCommandInfo;
+
+  public ECReconstructionCoordinatorTask(
+      ECReconstructionCommandInfo reconstructionCommandInfo) {
+    this.reconstructionCommandInfo = reconstructionCommandInfo;
+  }
+
+  @Override
+  public void run() {
+    // Implement the coordinator logic to handle a container group
+    // reconstruction.
+
+    // 1. Read container block meta info from the available min required good
+    // containers. ( Full block set should be available with 1st or parity
+    // indexes containers)
+    // 2. Find out the total number of blocks
+    // 3. Loop each block and use the ReconstructedInputStreams(HDDS-6665) and
+    // recover.
+    // 4. Write the recovered chunks to given targets/write locally to
+    // respective container. HDDS-6582
+    // 5. Close/finalize the recovered containers.
+  }
+
+  @Override
+  public String toString() {
+    return "ECReconstructionCoordinatorTask{" + "reconstructionCommandInfo="
+        + reconstructionCommandInfo + '}';
+  }
+}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionSupervisor.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionSupervisor.java
new file mode 100644
index 0000000000..e2c930a8c2
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionSupervisor.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.ec.reconstruction;
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
+import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
+
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * This class is to handle all the EC reconstruction tasks to be scheduled as
+ * they arrive.
+ */
+public class ECReconstructionSupervisor {
+
+  private final ContainerSet containerSet;
+  private final StateContext context;
+  private final ExecutorService executor;
+
+  public ECReconstructionSupervisor(ContainerSet containerSet,
+      StateContext context, ExecutorService executor) {
+    this.containerSet = containerSet;
+    this.context = context;
+    this.executor = executor;
+  }
+
+  public ECReconstructionSupervisor(ContainerSet containerSet,
+      StateContext context, int poolSize) {
+    // TODO: ReplicationSupervisor and this class can be refactored to have a
+    //  common interface.
+    this(containerSet, context,
+        new ThreadPoolExecutor(poolSize, poolSize, 60, TimeUnit.SECONDS,
+            new LinkedBlockingQueue<>(),
+            new ThreadFactoryBuilder().setDaemon(true)
+                .setNameFormat("ECContainerReconstructionThread-%d").build()));
+  }
+
+  public void stop() {
+    try {
+      executor.shutdown();
+      if (!executor.awaitTermination(3, TimeUnit.SECONDS)) {
+        executor.shutdownNow();
+      }
+    } catch (InterruptedException ie) {
+      // Ignore, we don't really care about the failure.
+      Thread.currentThread().interrupt();
+    }
+  }
+
+  public void addTask(ECReconstructionCoordinatorTask task) {
+    executor.execute(task);
+  }
+}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/package-info.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/package-info.java
new file mode 100644
index 0000000000..61f4f5c36b
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/package-info.java
@@ -0,0 +1,18 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.ec.reconstruction;
\ No newline at end of file
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReconstructECContainersCommand.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReconstructECContainersCommand.java
index 17c8a4f3f7..c63c96fa05 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReconstructECContainersCommand.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReconstructECContainersCommand.java
@@ -135,7 +135,10 @@ public class ReconstructECContainersCommand
     return ecReplicationConfig;
   }
 
-  static class DatanodeDetailsAndReplicaIndex {
+  /**
+   * To store the datanode details with replica index.
+   */
+  public static class DatanodeDetailsAndReplicaIndex {
     private DatanodeDetails dnDetails;
     private int replicaIndex;
 
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ec/reconstruction/TestECReconstructionSupervisor.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ec/reconstruction/TestECReconstructionSupervisor.java
new file mode 100644
index 0000000000..e86be82a09
--- /dev/null
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ec/reconstruction/TestECReconstructionSupervisor.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.ec.reconstruction;
+
+import org.apache.ozone.test.GenericTestUtils;
+import org.junit.Test;
+
+import java.util.concurrent.TimeoutException;
+
+/**
+ * Tests the ECReconstructionSupervisor.
+ */
+public class TestECReconstructionSupervisor {
+
+  private final ECReconstructionSupervisor supervisor =
+      new ECReconstructionSupervisor(null, null, 5);
+
+  @Test
+  public void testAddTaskShouldExecuteTheGivenTask()
+      throws InterruptedException, TimeoutException {
+    FakeTask task = new FakeTask(null);
+    supervisor.addTask(task);
+    GenericTestUtils.waitFor(() -> task.isExecuted, 100, 15000);
+  }
+
+  static class FakeTask extends ECReconstructionCoordinatorTask {
+    private boolean isExecuted = false;
+
+    FakeTask(ECReconstructionCommandInfo reconstructionCommandInfo) {
+      super(reconstructionCommandInfo);
+    }
+
+    @Override
+    public void run() {
+      isExecuted = true;
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org