You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by um...@apache.org on 2022/05/17 15:25:28 UTC
[ozone] branch master updated: HDDS-6660: EC: Add the DN side Reconstruction Handler class. (#3399)
This is an automated email from the ASF dual-hosted git repository.
umamahesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 44cf924f4d HDDS-6660: EC: Add the DN side Reconstruction Handler class. (#3399)
44cf924f4d is described below
commit 44cf924f4db76bcb10bc543f1ce79fecac64676f
Author: Uma Maheswara Rao G <um...@apache.org>
AuthorDate: Tue May 17 08:25:23 2022 -0700
HDDS-6660: EC: Add the DN side Reconstruction Handler class. (#3399)
---
.../common/statemachine/DatanodeStateMachine.java | 9 +++
.../ReconstructECContainersCommandHandler.java | 78 ++++++++++++++++++++
.../ECReconstructionCommandInfo.java | 82 ++++++++++++++++++++++
.../ECReconstructionCoordinatorTask.java | 52 ++++++++++++++
.../reconstruction/ECReconstructionSupervisor.java | 72 +++++++++++++++++++
.../container/ec/reconstruction/package-info.java | 18 +++++
.../commands/ReconstructECContainersCommand.java | 5 +-
.../TestECReconstructionSupervisor.java | 53 ++++++++++++++
8 files changed, 368 insertions(+), 1 deletion(-)
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
index c09a2bf721..0089522d01 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
@@ -49,9 +49,11 @@ import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.Crea
import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.DeleteBlocksCommandHandler;
import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.DeleteContainerCommandHandler;
import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.FinalizeNewLayoutVersionCommandHandler;
+import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.ReconstructECContainersCommandHandler;
import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.RefreshVolumeUsageCommandHandler;
import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.ReplicateContainerCommandHandler;
import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.SetNodeOperationalStateCommandHandler;
+import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionSupervisor;
import org.apache.hadoop.ozone.container.keyvalue.TarContainerPacker;
import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
import org.apache.hadoop.ozone.container.replication.ContainerReplicator;
@@ -96,6 +98,7 @@ public class DatanodeStateMachine implements Closeable {
private volatile Thread stateMachineThread = null;
private Thread cmdProcessThread = null;
private final ReplicationSupervisor supervisor;
+ private final ECReconstructionSupervisor ecReconstructionSupervisor;
private JvmPauseMonitor jvmPauseMonitor;
private CertificateClient dnCertClient;
@@ -178,6 +181,10 @@ public class DatanodeStateMachine implements Closeable {
replicationSupervisorMetrics =
ReplicationSupervisorMetrics.create(supervisor);
+ ecReconstructionSupervisor =
+ new ECReconstructionSupervisor(container.getContainerSet(), context,
+ replicationConfig.getReplicationMaxStreams());
+
// When we add new handlers just adding a new handler here should do the
// trick.
@@ -187,6 +194,8 @@ public class DatanodeStateMachine implements Closeable {
conf, dnConf.getBlockDeleteThreads(),
dnConf.getBlockDeleteQueueLimit()))
.addHandler(new ReplicateContainerCommandHandler(conf, supervisor))
+ .addHandler(new ReconstructECContainersCommandHandler(conf,
+ ecReconstructionSupervisor))
.addHandler(new DeleteContainerCommandHandler(
dnConf.getContainerDeleteThreads()))
.addHandler(new ClosePipelineCommandHandler())
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java
new file mode 100644
index 0000000000..f4ec45f600
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.common.statemachine.commandhandler;
+
+import org.apache.hadoop.hdds.conf.ConfigurationSource;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type;
+import org.apache.hadoop.ozone.container.common.statemachine.SCMConnectionManager;
+import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
+import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionCommandInfo;
+import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionCoordinatorTask;
+import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionSupervisor;
+import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
+import org.apache.hadoop.ozone.protocol.commands.ReconstructECContainersCommand;
+import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
+
+/**
+ * Command handler for reconstructing the lost EC containers.
+ */
+public class ReconstructECContainersCommandHandler implements CommandHandler {
+
+ private ECReconstructionSupervisor supervisor;
+ private ConfigurationSource conf;
+
+ public ReconstructECContainersCommandHandler(ConfigurationSource conf,
+ ECReconstructionSupervisor supervisor) {
+ this.conf = conf;
+ this.supervisor = supervisor;
+ }
+
+ @Override
+ public void handle(SCMCommand command, OzoneContainer container,
+ StateContext context, SCMConnectionManager connectionManager) {
+ ReconstructECContainersCommand ecContainersCommand =
+ (ReconstructECContainersCommand) command;
+ ECReconstructionCommandInfo reconstructionCommandInfo =
+ new ECReconstructionCommandInfo(ecContainersCommand.getContainerID(),
+ ecContainersCommand.getEcReplicationConfig(),
+ ecContainersCommand.getMissingContainerIndexes(),
+ ecContainersCommand.getSources(),
+ ecContainersCommand.getTargetDatanodes());
+ this.supervisor.addTask(
+ new ECReconstructionCoordinatorTask(reconstructionCommandInfo));
+ }
+
+ @Override
+ public Type getCommandType() {
+ return Type.reconstructECContainersCommand;
+ }
+
+ @Override
+ public int getInvocationCount() {
+ return 0;
+ }
+
+ @Override
+ public long getAverageRunTime() {
+ return 0;
+ }
+
+ public ConfigurationSource getConf() {
+ return conf;
+ }
+}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCommandInfo.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCommandInfo.java
new file mode 100644
index 0000000000..c95f9646f8
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCommandInfo.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.ec.reconstruction;
+
+import org.apache.hadoop.hdds.client.ECReplicationConfig;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.ozone.protocol.commands.ReconstructECContainersCommand;
+import org.apache.hadoop.ozone.protocol.commands.ReconstructECContainersCommand.DatanodeDetailsAndReplicaIndex;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * This class is to keep the required EC reconstruction info.
+ */
+public class ECReconstructionCommandInfo {
+ private long containerID;
+ private ECReplicationConfig ecReplicationConfig;
+ private byte[] missingContainerIndexes;
+ private List<ReconstructECContainersCommand.DatanodeDetailsAndReplicaIndex>
+ sources;
+ private List<DatanodeDetails> targetDatanodes;
+
+ public ECReconstructionCommandInfo(long containerID,
+ ECReplicationConfig ecReplicationConfig, byte[] missingContainerIndexes,
+ List<DatanodeDetailsAndReplicaIndex> sources,
+ List<DatanodeDetails> targetDatanodes) {
+ this.containerID = containerID;
+ this.ecReplicationConfig = ecReplicationConfig;
+ this.missingContainerIndexes =
+ Arrays.copyOf(missingContainerIndexes, missingContainerIndexes.length);
+ this.sources = sources;
+ this.targetDatanodes = targetDatanodes;
+ }
+
+ public long getContainerID() {
+ return containerID;
+ }
+
+ public byte[] getMissingContainerIndexes() {
+ return Arrays
+ .copyOf(missingContainerIndexes, missingContainerIndexes.length);
+ }
+
+ public ECReplicationConfig getEcReplicationConfig() {
+ return ecReplicationConfig;
+ }
+
+ public List<DatanodeDetailsAndReplicaIndex> getSources() {
+ return sources;
+ }
+
+ public List<DatanodeDetails> getTargetDatanodes() {
+ return targetDatanodes;
+ }
+
+ @Override
+ public String toString() {
+ return "ECReconstructionCommandInfo{"
+ + "containerID=" + containerID
+ + ", ecReplicationConfig=" + ecReplicationConfig
+ + ", missingContainerIndexes=" + Arrays
+ .toString(missingContainerIndexes)
+ + ", sources=" + sources
+ + ", targetDatanodes=" + targetDatanodes + '}';
+ }
+}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java
new file mode 100644
index 0000000000..24168e5f69
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.ec.reconstruction;
+
+/**
+ * This is the actual EC reconstruction coordination task.
+ */
+public class ECReconstructionCoordinatorTask implements Runnable {
+ private ECReconstructionCommandInfo reconstructionCommandInfo;
+
+ public ECReconstructionCoordinatorTask(
+ ECReconstructionCommandInfo reconstructionCommandInfo) {
+ this.reconstructionCommandInfo = reconstructionCommandInfo;
+ }
+
+ @Override
+ public void run() {
+ // Implement the coordinator logic to handle a container group
+ // reconstruction.
+
+ // 1. Read container block meta info from the available min required good
+ // containers. ( Full block set should be available with 1st or parity
+ // indexes containers)
+ // 2. Find out the total number of blocks
+ // 3. Loop each block and use the ReconstructedInputStreams(HDDS-6665) and
+ // recover.
+ // 4. Write the recovered chunks to given targets/write locally to
+ // respective container. HDDS-6582
+ // 5. Close/finalize the recovered containers.
+ }
+
+ @Override
+ public String toString() {
+ return "ECReconstructionCoordinatorTask{" + "reconstructionCommandInfo="
+ + reconstructionCommandInfo + '}';
+ }
+}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionSupervisor.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionSupervisor.java
new file mode 100644
index 0000000000..e2c930a8c2
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionSupervisor.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.ec.reconstruction;
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
+import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
+
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * This class is to handle all the EC reconstruction tasks to be scheduled as
+ * they arrive.
+ */
+public class ECReconstructionSupervisor {
+
+ private final ContainerSet containerSet;
+ private final StateContext context;
+ private final ExecutorService executor;
+
+ public ECReconstructionSupervisor(ContainerSet containerSet,
+ StateContext context, ExecutorService executor) {
+ this.containerSet = containerSet;
+ this.context = context;
+ this.executor = executor;
+ }
+
+ public ECReconstructionSupervisor(ContainerSet containerSet,
+ StateContext context, int poolSize) {
+ // TODO: ReplicationSupervisor and this class can be refactored to have a
+ // common interface.
+ this(containerSet, context,
+ new ThreadPoolExecutor(poolSize, poolSize, 60, TimeUnit.SECONDS,
+ new LinkedBlockingQueue<>(),
+ new ThreadFactoryBuilder().setDaemon(true)
+ .setNameFormat("ECContainerReconstructionThread-%d").build()));
+ }
+
+ public void stop() {
+ try {
+ executor.shutdown();
+ if (!executor.awaitTermination(3, TimeUnit.SECONDS)) {
+ executor.shutdownNow();
+ }
+ } catch (InterruptedException ie) {
+ // Ignore, we don't really care about the failure.
+ Thread.currentThread().interrupt();
+ }
+ }
+
+ public void addTask(ECReconstructionCoordinatorTask task) {
+ executor.execute(task);
+ }
+}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/package-info.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/package-info.java
new file mode 100644
index 0000000000..61f4f5c36b
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/package-info.java
@@ -0,0 +1,18 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.ec.reconstruction;
\ No newline at end of file
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReconstructECContainersCommand.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReconstructECContainersCommand.java
index 17c8a4f3f7..c63c96fa05 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReconstructECContainersCommand.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReconstructECContainersCommand.java
@@ -135,7 +135,10 @@ public class ReconstructECContainersCommand
return ecReplicationConfig;
}
- static class DatanodeDetailsAndReplicaIndex {
+ /**
+ * To store the datanode details with replica index.
+ */
+ public static class DatanodeDetailsAndReplicaIndex {
private DatanodeDetails dnDetails;
private int replicaIndex;
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ec/reconstruction/TestECReconstructionSupervisor.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ec/reconstruction/TestECReconstructionSupervisor.java
new file mode 100644
index 0000000000..e86be82a09
--- /dev/null
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ec/reconstruction/TestECReconstructionSupervisor.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.ec.reconstruction;
+
+import org.apache.ozone.test.GenericTestUtils;
+import org.junit.Test;
+
+import java.util.concurrent.TimeoutException;
+
+/**
+ * Tests the ECReconstructionSupervisor.
+ */
+public class TestECReconstructionSupervisor {
+
+ private final ECReconstructionSupervisor supervisor =
+ new ECReconstructionSupervisor(null, null, 5);
+
+ @Test
+ public void testAddTaskShouldExecuteTheGivenTask()
+ throws InterruptedException, TimeoutException {
+ FakeTask task = new FakeTask(null);
+ supervisor.addTask(task);
+ GenericTestUtils.waitFor(() -> task.isExecuted, 100, 15000);
+ }
+
+ static class FakeTask extends ECReconstructionCoordinatorTask {
+ private boolean isExecuted = false;
+
+ FakeTask(ECReconstructionCommandInfo reconstructionCommandInfo) {
+ super(reconstructionCommandInfo);
+ }
+
+ @Override
+ public void run() {
+ isExecuted = true;
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org