You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by ha...@apache.org on 2020/06/03 21:42:38 UTC

[hadoop-ozone] branch master updated: HDDS-3586. OM HA can be started with 3 isolated LEADER instead of one OM ring (#925)

This is an automated email from the ASF dual-hosted git repository.

hanishakoneru pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hadoop-ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 312a977  HDDS-3586. OM HA can be started with 3 isolated LEADER instead of one OM ring (#925)
312a977 is described below

commit 312a97710d55c67d149818a15a13bf636751b459
Author: Hanisha Koneru <ha...@apache.org>
AuthorDate: Wed Jun 3 14:42:29 2020 -0700

    HDDS-3586. OM HA can be started with 3 isolated LEADER instead of one OM ring (#925)
---
 .../ozone/om/TestOzoneManagerConfiguration.java    | 106 +++++++++++++++++++--
 .../org/apache/hadoop/ozone/om/OzoneManager.java   |   2 +-
 .../apache/hadoop/ozone/om/ha/OMHANodeDetails.java |  86 +++++++++--------
 .../apache/hadoop/ozone/om/ha/OMNodeDetails.java   |  18 +++-
 .../ozone/om/ratis/OzoneManagerRatisServer.java    |  13 ++-
 .../hadoop/ozone/shell/keys/CopyKeyHandler.java    |  24 +++--
 6 files changed, 182 insertions(+), 67 deletions(-)

diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerConfiguration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerConfiguration.java
index 59c70f9..d66578e 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerConfiguration.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerConfiguration.java
@@ -17,6 +17,15 @@
 
 package org.apache.hadoop.ozone.om;
 
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Collection;
+import java.util.List;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+
 import org.apache.hadoop.hdds.HddsConfigKeys;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.scm.ScmConfigKeys;
@@ -24,23 +33,17 @@ import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.ozone.MiniOzoneCluster;
 import org.apache.hadoop.ozone.OmUtils;
 import org.apache.hadoop.ozone.OzoneIllegalArgumentException;
+import org.apache.hadoop.ozone.om.ha.OMNodeDetails;
 import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer;
 import org.apache.hadoop.test.GenericTestUtils;
+
 import org.apache.ratis.protocol.RaftPeer;
 import org.apache.ratis.util.LifeCycle;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
-import org.junit.Test;
-
-import java.io.IOException;
-import java.net.InetSocketAddress;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.Collection;
-import java.util.UUID;
-import java.util.concurrent.TimeUnit;
 import org.junit.Rule;
+import org.junit.Test;
 import org.junit.rules.Timeout;
 
 /**
@@ -251,6 +254,89 @@ public class TestOzoneManagerConfiguration {
   }
 
   /**
+   * Test configurating an OM service with three OM nodes.
+   * @throws Exception
+   */
+  @Test
+  public void testOMHAWithUnresolvedAddresses() throws Exception {
+    // Set the configuration for 3 node OM service. Set one node's rpc
+    // address to localhost. OM will parse all configurations and find the
+    // nodeId representing the localhost
+
+    final String omServiceId = "om-test-unresolved-addresses";
+    final String omNode1Id = "omNode1";
+    final String omNode2Id = "omNode2";
+    final String omNode3Id = "omNode3";
+    final String node1Hostname = "node1.example.com";
+    final String node3Hostname = "node3.example.com";
+
+    String omNodesKeyValue = omNode1Id + "," + omNode2Id + "," + omNode3Id;
+    String omNodesKey = OmUtils.addKeySuffixes(
+        OMConfigKeys.OZONE_OM_NODES_KEY, omServiceId);
+
+    String omNode1RpcAddrKey = getOMAddrKeyWithSuffix(omServiceId, omNode1Id);
+    String omNode2RpcAddrKey = getOMAddrKeyWithSuffix(omServiceId, omNode2Id);
+    String omNode3RpcAddrKey = getOMAddrKeyWithSuffix(omServiceId, omNode3Id);
+
+    String omNode3RatisPortKey = OmUtils.addKeySuffixes(
+        OMConfigKeys.OZONE_OM_RATIS_PORT_KEY, omServiceId, omNode3Id);
+
+    conf.set(OMConfigKeys.OZONE_OM_SERVICE_IDS_KEY, omServiceId);
+    conf.set(omNodesKey, omNodesKeyValue);
+
+    // Set node2 to localhost and the other two nodes to dummy addresses
+    conf.set(omNode1RpcAddrKey, node1Hostname + ":9862");
+    conf.set(omNode2RpcAddrKey, "0.0.0.0:9862");
+    conf.set(omNode3RpcAddrKey, node3Hostname + ":9804");
+
+    conf.setInt(omNode3RatisPortKey, 9898);
+
+    startCluster();
+    om = cluster.getOzoneManager();
+    omRatisServer = om.getOmRatisServer();
+
+    // Verify Peer details
+    List<OMNodeDetails> peerNodes = om.getPeerNodes();
+    for (OMNodeDetails peerNode : peerNodes) {
+      Assert.assertTrue(peerNode.isHostUnresolved());
+      Assert.assertNull(peerNode.getInetAddress());
+    }
+
+    Assert.assertEquals(LifeCycle.State.RUNNING, om.getOmRatisServerState());
+
+    // OM's Ratis server should have 3 peers in its RaftGroup
+    Collection<RaftPeer> peers = omRatisServer.getRaftGroup().getPeers();
+    Assert.assertEquals(3, peers.size());
+
+    // Ratis server RaftPeerId should match with omNode2 ID as node2 is the
+    // localhost
+    Assert.assertEquals(omNode2Id, omRatisServer.getRaftPeerId().toString());
+
+    // Verify peer details
+    for (RaftPeer peer : peers) {
+      String expectedPeerAddress = null;
+
+      switch (peer.getId().toString()) {
+      case omNode1Id :
+        // Ratis port is not set for node1. So it should take the default port
+        expectedPeerAddress = node1Hostname + ":" +
+            OMConfigKeys.OZONE_OM_RATIS_PORT_DEFAULT;
+        break;
+      case omNode2Id :
+        expectedPeerAddress = "0.0.0.0:"+
+            OMConfigKeys.OZONE_OM_RATIS_PORT_DEFAULT;
+        break;
+      case omNode3Id :
+        // Ratis port is not set for node3. So it should take the default port
+        expectedPeerAddress = node3Hostname + ":9898";
+        break;
+      default : Assert.fail("Unrecognized RaftPeerId");
+      }
+      Assert.assertEquals(expectedPeerAddress, peer.getAddress());
+    }
+  }
+
+  /**
    * Test a wrong configuration for OM HA. A configuration with none of the
    * OM addresses matching the local address should throw an error.
    * @throws Exception
@@ -335,7 +421,7 @@ public class TestOzoneManagerConfiguration {
     } catch (OzoneIllegalArgumentException e) {
       // Expect error message
       Assert.assertTrue(e.getMessage().contains(
-          "OM Rpc Address should be set for all node"));
+          "OM RPC Address should be set for all node"));
     }
   }
 
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
index faa001d..5658b53 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
@@ -2467,7 +2467,7 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
       for (OMNodeDetails peerNode : peerNodes) {
         ServiceInfo.Builder peerOmServiceInfoBuilder = ServiceInfo.newBuilder()
             .setNodeType(HddsProtos.NodeType.OM)
-            .setHostname(peerNode.getAddress().getHostName())
+            .setHostname(peerNode.getHostName())
             .addServicePort(ServicePort.newBuilder()
                 .setType(ServicePort.Type.RPC)
                 .setValue(peerNode.getRpcPort())
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ha/OMHANodeDetails.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ha/OMHANodeDetails.java
index ea78302..75bfd7e 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ha/OMHANodeDetails.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ha/OMHANodeDetails.java
@@ -119,10 +119,10 @@ public class OMHANodeDetails {
       Collection<String> omNodeIds = OmUtils.getOMNodeIds(conf, serviceId);
 
       if (omNodeIds.size() == 0) {
-        String msg = "Configuration does not have any value set for " +
-            OZONE_OM_NODES_KEY + " for service ID " + serviceId + ". List of " +
-            "OM Node ID's should be specified for the service ID";
-        throw new OzoneIllegalArgumentException(msg);
+        throwConfException("Configuration does not have any value set for %s " +
+            "for the service %s. List of OM Node ID's should be specified " +
+            "for an OM service", OZONE_OM_NODES_KEY, serviceId);
+        return null;
       }
 
       List<OMNodeDetails> peerNodesList = new ArrayList<>();
@@ -137,10 +137,10 @@ public class OMHANodeDetails {
             serviceId, nodeId);
         String rpcAddrStr = OmUtils.getOmRpcAddress(conf, rpcAddrKey);
         if (rpcAddrStr == null || rpcAddrStr.isEmpty()) {
-          String msg = "Configuration does not have any value set for " +
-              rpcAddrKey + "." + "OM Rpc Address should be set for all node " +
-              "IDs for a service ID.";
-          throw new OzoneIllegalArgumentException(msg);
+          throwConfException("Configuration does not have any value set for " +
+              "%s. OM RPC Address should be set for all nodes in an OM " +
+              "service.", rpcAddrKey);
+          return null;
         }
 
         // If OM address is set for any node id, we will not fallback to the
@@ -155,24 +155,30 @@ public class OMHANodeDetails {
         try {
           addr = NetUtils.createSocketAddr(rpcAddrStr);
         } catch (Exception e) {
-          LOG.warn("Exception in creating socket address " + addr, e);
-          continue;
+          LOG.error("Couldn't create socket address for OM {} : {}", nodeId,
+              rpcAddrStr, e);
+          throw e;
         }
-        if (!addr.isUnresolved()) {
-          if (!isPeer && OmUtils.isAddressLocal(addr)) {
-            localRpcAddress = addr;
-            localOMServiceId = serviceId;
-            localOMNodeId = nodeId;
-            localRatisPort = ratisPort;
-            found++;
-          } else {
-            // This OMNode belongs to same OM service as the current OMNode.
-            // Add it to peerNodes list.
-            // This OMNode belongs to same OM service as the current OMNode.
-            // Add it to peerNodes list.
-            peerNodesList.add(getHAOMNodeDetails(conf, serviceId,
-                nodeId, addr, ratisPort));
-          }
+
+        if (addr.isUnresolved()) {
+          LOG.error("Address for OM {} : {} couldn't be resolved. Proceeding " +
+                  "with unresolved host to create Ratis ring.", nodeId,
+              rpcAddrStr);
+        }
+
+        if (!addr.isUnresolved() && !isPeer && OmUtils.isAddressLocal(addr)) {
+          localRpcAddress = addr;
+          localOMServiceId = serviceId;
+          localOMNodeId = nodeId;
+          localRatisPort = ratisPort;
+          found++;
+        } else {
+          // This OMNode belongs to same OM service as the current OMNode.
+          // Add it to peerNodes list.
+          // This OMNode belongs to same OM service as the current OMNode.
+          // Add it to peerNodes list.
+          peerNodesList.add(getHAOMNodeDetails(conf, serviceId,
+              nodeId, addr, ratisPort));
         }
       }
       if (found == 1) {
@@ -190,11 +196,11 @@ public class OMHANodeDetails {
             localOMNodeId, localRpcAddress, localRatisPort), peerNodesList);
 
       } else if (found > 1) {
-        String msg = "Configuration has multiple " + OZONE_OM_ADDRESS_KEY +
-            " addresses that match local node's address. Please configure the" +
-            " system with " + OZONE_OM_SERVICE_IDS_KEY + " and " +
-            OZONE_OM_ADDRESS_KEY;
-        throw new OzoneIllegalArgumentException(msg);
+        throwConfException("Configuration has multiple %s addresses that " +
+            "match local node's address. Please configure the system with %s " +
+            "and %s", OZONE_OM_ADDRESS_KEY, OZONE_OM_SERVICE_IDS_KEY,
+            OZONE_OM_ADDRESS_KEY);
+        return null;
       }
     }
 
@@ -211,11 +217,9 @@ public class OMHANodeDetails {
           null, omAddress, ratisPort), new ArrayList<>());
 
     } else {
-      String msg = "Configuration has no " + OZONE_OM_ADDRESS_KEY + " " +
-          "address that matches local node's address. Please configure the " +
-          "system with " + OZONE_OM_ADDRESS_KEY;
-      LOG.info(msg);
-      throw new OzoneIllegalArgumentException(msg);
+      throwConfException("Configuration has no %s address that matches local " +
+          "node's address.", OZONE_OM_ADDRESS_KEY);
+      return null;
     }
   }
 
@@ -238,7 +242,6 @@ public class OMHANodeDetails {
           serviceId);
     }
 
-
     // We need to pass null for serviceID and nodeID as this is set for
     // non-HA cluster. This means one node OM cluster.
     String httpAddr = OmUtils.getHttpAddressForOMPeerNode(conf,
@@ -254,10 +257,8 @@ public class OMHANodeDetails {
         .setHttpAddress(httpAddr)
         .setHttpsAddress(httpsAddr)
         .build();
-
   }
 
-
   /**
    * Create Local OM Node Details.
    * @param serviceId - Service ID this OM belongs to,
@@ -285,10 +286,8 @@ public class OMHANodeDetails {
         .setHttpAddress(httpAddr)
         .setHttpsAddress(httpsAddr)
         .build();
-
   }
 
-
   /**
    * Check if any of the following configuration keys have been set using OM
    * Node ID suffixed to the key. If yes, then set the base key with the
@@ -319,5 +318,10 @@ public class OMHANodeDetails {
     }
   }
 
-
+  private static void throwConfException(String message, String... arguments)
+      throws IllegalArgumentException {
+    String exceptionMsg = String.format(message, arguments);
+    LOG.error(exceptionMsg);
+    throw new OzoneIllegalArgumentException(exceptionMsg);
+  }
 }
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ha/OMNodeDetails.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ha/OMNodeDetails.java
index 7ed666b..b00191d 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ha/OMNodeDetails.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ha/OMNodeDetails.java
@@ -129,10 +129,26 @@ public final class OMNodeDetails {
     return rpcAddress;
   }
 
-  public InetAddress getAddress() {
+  public boolean isHostUnresolved() {
+    return rpcAddress.isUnresolved();
+  }
+
+  public InetAddress getInetAddress() {
     return rpcAddress.getAddress();
   }
 
+  public String getHostName() {
+    return rpcAddress.getHostName();
+  }
+
+  public String getRatisHostPortStr() {
+    StringBuilder hostPort = new StringBuilder();
+    hostPort.append(getHostName())
+        .append(":")
+        .append(ratisPort);
+    return hostPort.toString();
+  }
+
   public int getRatisPort() {
     return ratisPort;
   }
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
index 359ab1f..191ab00 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
@@ -307,7 +307,7 @@ public final class OzoneManagerRatisServer {
     RaftPeerId localRaftPeerId = RaftPeerId.getRaftPeerId(omNodeId);
 
     InetSocketAddress ratisAddr = new InetSocketAddress(
-        omNodeDetails.getAddress(), omNodeDetails.getRatisPort());
+        omNodeDetails.getInetAddress(), omNodeDetails.getRatisPort());
 
     RaftPeer localRaftPeer = new RaftPeer(localRaftPeerId, ratisAddr);
 
@@ -317,10 +317,15 @@ public final class OzoneManagerRatisServer {
 
     for (OMNodeDetails peerInfo : peerNodes) {
       String peerNodeId = peerInfo.getOMNodeId();
-      InetSocketAddress peerRatisAddr = new InetSocketAddress(
-          peerInfo.getAddress(), peerInfo.getRatisPort());
       RaftPeerId raftPeerId = RaftPeerId.valueOf(peerNodeId);
-      RaftPeer raftPeer = new RaftPeer(raftPeerId, peerRatisAddr);
+      RaftPeer raftPeer;
+      if (peerInfo.isHostUnresolved()) {
+        raftPeer = new RaftPeer(raftPeerId, peerInfo.getRatisHostPortStr());
+      } else {
+        InetSocketAddress peerRatisAddr = new InetSocketAddress(
+            peerInfo.getInetAddress(), peerInfo.getRatisPort());
+        raftPeer = new RaftPeer(raftPeerId, peerRatisAddr);
+      }
 
       // Add other OM nodes belonging to the same OM service to the Ratis ring
       raftPeers.add(raftPeer);
diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/CopyKeyHandler.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/CopyKeyHandler.java
index 228d957..1437f96 100644
--- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/CopyKeyHandler.java
+++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/CopyKeyHandler.java
@@ -17,23 +17,24 @@
  */
 package org.apache.hadoop.ozone.shell.keys;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.HashMap;
+import java.util.Map;
+
 import org.apache.hadoop.conf.StorageUnit;
 import org.apache.hadoop.hdds.client.ReplicationFactor;
 import org.apache.hadoop.hdds.client.ReplicationType;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.ozone.OzoneConsts;
-import org.apache.hadoop.ozone.client.*;
+import org.apache.hadoop.ozone.client.OzoneBucket;
+import org.apache.hadoop.ozone.client.OzoneClient;
+import org.apache.hadoop.ozone.client.OzoneClientException;
+import org.apache.hadoop.ozone.client.OzoneKeyDetails;
+import org.apache.hadoop.ozone.client.OzoneVolume;
 import org.apache.hadoop.ozone.shell.OzoneAddress;
 import org.apache.hadoop.ozone.shell.bucket.BucketHandler;
-import picocli.CommandLine.Command;
-import picocli.CommandLine.Option;
-import picocli.CommandLine.Parameters;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.HashMap;
-import java.util.Map;
 
 import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_DEFAULT;
 import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY;
@@ -41,6 +42,9 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_REPLICATION;
 import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_REPLICATION_DEFAULT;
 import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_REPLICATION_TYPE;
 import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_REPLICATION_TYPE_DEFAULT;
+import picocli.CommandLine.Command;
+import picocli.CommandLine.Option;
+import picocli.CommandLine.Parameters;
 
 /**
  * Copy an existing key to another one within the same bucket.


---------------------------------------------------------------------
To unsubscribe, e-mail: ozone-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: ozone-commits-help@hadoop.apache.org