You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by bh...@apache.org on 2020/09/15 22:08:05 UTC

[hadoop-ozone] branch master updated: HDDS-4075. Retry request on different OM on AccessControlException (#1303)

This is an automated email from the ASF dual-hosted git repository.

bharat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hadoop-ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 045aa71  HDDS-4075. Retry request on different OM on AccessControlException (#1303)
045aa71 is described below

commit 045aa7113ccc5fc2804f64870a986dfa5cc91e82
Author: Hanisha Koneru <ha...@apache.org>
AuthorDate: Tue Sep 15 15:07:54 2020 -0700

    HDDS-4075. Retry request on different OM on AccessControlException (#1303)
---
 .../main/java/org/apache/hadoop/ozone/OmUtils.java |  26 +++
 .../ozone/om/ha/OMFailoverProxyProvider.java       | 187 +++++++++++++++++++--
 .../ozone/om/protocolPB/Hadoop3OmTransport.java    | 175 +------------------
 .../hadoop/ozone/om/failover/TestOMFailovers.java  | 152 +++++++++++++++++
 4 files changed, 357 insertions(+), 183 deletions(-)

diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java
index 93e0e7f..2a34580 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java
@@ -17,6 +17,7 @@
 
 package org.apache.hadoop.ozone;
 
+import com.google.protobuf.ServiceException;
 import java.io.File;
 import java.io.IOException;
 import java.net.InetSocketAddress;
@@ -36,12 +37,15 @@ import java.util.OptionalInt;
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.scm.client.HddsClientUtils;
+import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.ozone.conf.OMClientConfig;
 import org.apache.hadoop.ozone.om.exceptions.OMException;
 import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
 import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
+import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.security.token.SecretManager;
 
 import com.google.common.base.Joiner;
 import org.apache.commons.lang3.StringUtils;
@@ -59,6 +63,7 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_INTERNAL_SERVICE_
 import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_NODES_KEY;
 import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_PORT_DEFAULT;
 import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SERVICE_IDS_KEY;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -575,4 +580,25 @@ public final class OmUtils {
       return serviceId;
     }
   }
+
+  /**
+   * Unwrap exception to check if it is some kind of access control problem
+   * ({@link AccessControlException} or {@link SecretManager.InvalidToken}).
+   */
+  public static boolean isAccessControlException(Exception ex) {
+    if (ex instanceof ServiceException) {
+      Throwable t = ex.getCause();
+      if (t instanceof RemoteException) {
+        t = ((RemoteException) t).unwrapRemoteException();
+      }
+      while (t != null) {
+        if (t instanceof AccessControlException ||
+            t instanceof SecretManager.InvalidToken) {
+          return true;
+        }
+        t = t.getCause();
+      }
+    }
+    return false;
+  }
 }
diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProvider.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProvider.java
index 1abe5ab..acafa7c 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProvider.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProvider.java
@@ -18,6 +18,9 @@
 
 package org.apache.hadoop.ozone.om.ha;
 
+import com.google.common.annotations.VisibleForTesting;
+import com.google.protobuf.ServiceException;
+
 import java.io.Closeable;
 import java.io.IOException;
 import java.net.InetSocketAddress;
@@ -32,28 +35,31 @@ import java.util.Set;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
-import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.utils.LegacyHadoopConfigurationSource;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.io.retry.RetryInvocationHandler;
 import org.apache.hadoop.io.retry.RetryPolicies;
 import org.apache.hadoop.io.retry.RetryPolicy;
+import org.apache.hadoop.io.retry.RetryPolicy.RetryAction.RetryDecision;
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.ozone.OmUtils;
 import org.apache.hadoop.ozone.OzoneConfigKeys;
 import org.apache.hadoop.ozone.OzoneConsts;
+import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException;
+import org.apache.hadoop.ozone.om.exceptions.OMNotLeaderException;
 import org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB;
 import org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB;
 import org.apache.hadoop.security.UserGroupInformation;
 
-import com.google.common.annotations.VisibleForTesting;
-import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY;
+
 /**
  * A failover proxy provider implementation which allows clients to configure
  * multiple OMs to connect to. In case of OM failover, client can try
@@ -80,6 +86,7 @@ public class OMFailoverProxyProvider implements
 
   private final String omServiceId;
 
+  private List<String> retryExceptions = new ArrayList<>();
 
   // OMFailoverProxyProvider, on encountering certain exception, tries each OM
   // once in a round robin fashion. After that it waits for configured time
@@ -90,6 +97,7 @@ public class OMFailoverProxyProvider implements
   private String lastAttemptedOM;
   private int numAttemptsOnSameOM = 0;
   private final long waitBetweenRetries;
+  private Set<String> accessControlExceptionOMs = new HashSet<>();
 
   public OMFailoverProxyProvider(ConfigurationSource configuration,
       UserGroupInformation ugi, String omServiceId) throws IOException {
@@ -108,12 +116,7 @@ public class OMFailoverProxyProvider implements
         OzoneConfigKeys.OZONE_CLIENT_WAIT_BETWEEN_RETRIES_MILLIS_DEFAULT);
   }
 
-  public OMFailoverProxyProvider(OzoneConfiguration configuration,
-      UserGroupInformation ugi) throws IOException {
-    this(configuration, ugi, null);
-  }
-
-  private void loadOMClientConfigs(ConfigurationSource config, String omSvcId)
+  protected void loadOMClientConfigs(ConfigurationSource config, String omSvcId)
       throws IOException {
     this.omProxies = new HashMap<>();
     this.omProxyInfos = new HashMap<>();
@@ -203,7 +206,7 @@ public class OMFailoverProxyProvider implements
   /**
    * Creates proxy object if it does not already exist.
    */
-  private void createOMProxyIfNeeded(ProxyInfo proxyInfo,
+  protected void createOMProxyIfNeeded(ProxyInfo proxyInfo,
       String nodeId) {
     if (proxyInfo.proxy == null) {
       InetSocketAddress address = omProxyInfos.get(nodeId).getAddress();
@@ -223,11 +226,90 @@ public class OMFailoverProxyProvider implements
     }
   }
 
+  @VisibleForTesting
+  public RetryPolicy getRetryPolicy(int maxFailovers) {
+    // Client will attempt upto maxFailovers number of failovers between
+    // available OMs before throwing exception.
+    RetryPolicy retryPolicy = new RetryPolicy() {
+      @Override
+      public RetryAction shouldRetry(Exception exception, int retries,
+          int failovers, boolean isIdempotentOrAtMostOnce)
+          throws Exception {
+
+        if (LOG.isDebugEnabled()) {
+          if (exception.getCause() != null) {
+            LOG.debug("RetryProxy: OM {}: {}: {}", getCurrentProxyOMNodeId(),
+                exception.getCause().getClass().getSimpleName(),
+                exception.getCause().getMessage());
+          } else {
+            LOG.debug("RetryProxy: OM {}: {}", getCurrentProxyOMNodeId(),
+                exception.getMessage());
+          }
+        }
+        retryExceptions.add(getExceptionMsg(exception, failovers));
+
+        if (exception instanceof ServiceException) {
+          OMNotLeaderException notLeaderException =
+              getNotLeaderException(exception);
+          if (notLeaderException != null) {
+            // TODO: NotLeaderException should include the host
+            //  address of the suggested leader along with the nodeID.
+            //  Failing over just based on nodeID is not very robust.
+
+            // OMFailoverProxyProvider#performFailover() is a dummy call and
+            // does not perform any failover. Failover manually to the next OM.
+            performFailoverToNextProxy();
+            return getRetryAction(RetryDecision.FAILOVER_AND_RETRY, failovers);
+          }
+
+          OMLeaderNotReadyException leaderNotReadyException =
+              getLeaderNotReadyException(exception);
+          if (leaderNotReadyException != null) {
+            // Retry on same OM again as leader OM is not ready.
+            // Failing over to same OM so that wait time between retries is
+            // incremented
+            performFailoverIfRequired(getCurrentProxyOMNodeId());
+            return getRetryAction(RetryDecision.FAILOVER_AND_RETRY, failovers);
+          }
+        }
+
+        if (!shouldFailover(exception)) {
+          return RetryAction.FAIL; // do not retry
+        }
+
+        // For all other exceptions, fail over manually to the next OM Node
+        // proxy.
+        performFailoverToNextProxy();
+        return getRetryAction(RetryDecision.FAILOVER_AND_RETRY, failovers);
+      }
+
+      private RetryAction getRetryAction(RetryDecision fallbackAction,
+          int failovers) {
+        if (failovers < maxFailovers) {
+          return new RetryAction(fallbackAction, getWaitTime());
+        } else {
+          StringBuilder allRetryExceptions = new StringBuilder();
+          allRetryExceptions.append("\n");
+          retryExceptions.stream().forEach(e -> allRetryExceptions.append(e)
+              .append("\n"));
+          LOG.error("Failed to connect to OMs: {}. Attempted {} failovers. " +
+                  "Got following exceptions during retries: {}",
+              getOMProxyInfos(), maxFailovers,
+              allRetryExceptions.toString());
+          retryExceptions.clear();
+          return RetryAction.FAIL;
+        }
+      }
+    };
+
+    return retryPolicy;
+  }
+
   public Text getCurrentProxyDelegationToken() {
     return delegationTokenService;
   }
 
-  private Text computeDelegationTokenService() {
+  protected Text computeDelegationTokenService() {
     // For HA, this will return "," separated address of all OM's.
     List<String> addresses = new ArrayList<>();
 
@@ -351,7 +433,7 @@ public class OMFailoverProxyProvider implements
 
   public synchronized long getWaitTime() {
     if (currentProxyOMNodeId.equals(lastAttemptedOM)) {
-      // Clear attemptedOMs list as round robin has been broken. Add only the
+      // Clear attemptedOMs list as round robin has been broken.
       attemptedOMs.clear();
 
       // The same OM will be contacted again. So wait and then retry.
@@ -375,6 +457,23 @@ public class OMFailoverProxyProvider implements
     return waitBetweenRetries;
   }
 
+  public synchronized boolean shouldFailover(Exception ex) {
+    if (OmUtils.isAccessControlException(ex)) {
+      // Retry all available OMs once before failing with
+      // AccessControlException.
+      if (accessControlExceptionOMs.contains(currentProxyOMNodeId)) {
+        accessControlExceptionOMs.clear();
+        return false;
+      } else {
+        accessControlExceptionOMs.add(currentProxyOMNodeId);
+        if (accessControlExceptionOMs.containsAll(omNodeIDList)) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
   /**
    * Close all the proxy objects which have been opened over the lifetime of
    * the proxy provider.
@@ -398,5 +497,69 @@ public class OMFailoverProxyProvider implements
   public List<OMProxyInfo> getOMProxyInfos() {
     return new ArrayList<OMProxyInfo>(omProxyInfos.values());
   }
+
+  private static String getExceptionMsg(Exception e, int retryAttempt) {
+    StringBuilder exceptionMsg = new StringBuilder()
+        .append("Retry Attempt ")
+        .append(retryAttempt)
+        .append(" Exception - ");
+    if (e.getCause() == null) {
+      exceptionMsg.append(e.getClass().getCanonicalName())
+          .append(": ")
+          .append(e.getMessage());
+    } else {
+      exceptionMsg.append(e.getCause().getClass().getCanonicalName())
+          .append(": ")
+          .append(e.getCause().getMessage());
+    }
+    return exceptionMsg.toString();
+  }
+
+  /**
+   * Check if exception is OMLeaderNotReadyException.
+   *
+   * @param exception
+   * @return OMLeaderNotReadyException
+   */
+  private static OMLeaderNotReadyException getLeaderNotReadyException(
+      Exception exception) {
+    Throwable cause = exception.getCause();
+    if (cause instanceof RemoteException) {
+      IOException ioException =
+          ((RemoteException) cause).unwrapRemoteException();
+      if (ioException instanceof OMLeaderNotReadyException) {
+        return (OMLeaderNotReadyException) ioException;
+      }
+    }
+    return null;
+  }
+
+  /**
+   * Check if exception is a OMNotLeaderException.
+   *
+   * @return OMNotLeaderException.
+   */
+  public static OMNotLeaderException getNotLeaderException(
+      Exception exception) {
+    Throwable cause = exception.getCause();
+    if (cause instanceof RemoteException) {
+      IOException ioException =
+          ((RemoteException) cause).unwrapRemoteException();
+      if (ioException instanceof OMNotLeaderException) {
+        return (OMNotLeaderException) ioException;
+      }
+    }
+    return null;
+  }
+
+  @VisibleForTesting
+  protected void setProxiesForTesting(
+      Map<String, ProxyInfo<OzoneManagerProtocolPB>> testOMProxies,
+      Map<String, OMProxyInfo> testOMProxyInfos,
+      List<String> testOMNodeIDList) {
+    this.omProxies = testOMProxies;
+    this.omProxyInfos = testOMProxyInfos;
+    this.omNodeIDList = testOMNodeIDList;
+  }
 }
 
diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/Hadoop3OmTransport.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/Hadoop3OmTransport.java
index 33d22b4..ea0e534 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/Hadoop3OmTransport.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/Hadoop3OmTransport.java
@@ -18,28 +18,20 @@
 package org.apache.hadoop.ozone.om.protocolPB;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
 
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.retry.RetryPolicy;
-import org.apache.hadoop.io.retry.RetryPolicy.RetryAction.RetryDecision;
 import org.apache.hadoop.io.retry.RetryProxy;
 import org.apache.hadoop.ipc.ProtobufHelper;
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.RPC;
-import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.ozone.OzoneConfigKeys;
-import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException;
 import org.apache.hadoop.ozone.om.exceptions.OMNotLeaderException;
 import org.apache.hadoop.ozone.om.ha.OMFailoverProxyProvider;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse;
-import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.token.SecretManager;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.protobuf.RpcController;
@@ -63,7 +55,6 @@ public class Hadoop3OmTransport implements OmTransport {
   private final OMFailoverProxyProvider omFailoverProxyProvider;
 
   private final OzoneManagerProtocolPB rpcProxy;
-  private List<String> retryExceptions = new ArrayList<>();
 
   public Hadoop3OmTransport(ConfigurationSource conf,
       UserGroupInformation ugi, String omServiceId) throws IOException {
@@ -97,7 +88,8 @@ public class Hadoop3OmTransport implements OmTransport {
       }
       return omResponse;
     } catch (ServiceException e) {
-      OMNotLeaderException notLeaderException = getNotLeaderException(e);
+      OMNotLeaderException notLeaderException =
+          OMFailoverProxyProvider.getNotLeaderException(e);
       if (notLeaderException == null) {
         throw ProtobufHelper.getRemoteException(e);
       }
@@ -118,171 +110,12 @@ public class Hadoop3OmTransport implements OmTransport {
   private OzoneManagerProtocolPB createRetryProxy(
       OMFailoverProxyProvider failoverProxyProvider, int maxFailovers) {
 
-    // Client attempts contacting each OM ipc.client.connect.max.retries
-    // (default = 10) times before failing over to the next OM, if
-    // available.
-    // Client will attempt upto maxFailovers number of failovers between
-    // available OMs before throwing exception.
-    RetryPolicy retryPolicy = new RetryPolicy() {
-      @Override
-      public RetryAction shouldRetry(Exception exception, int retries,
-          int failovers, boolean isIdempotentOrAtMostOnce)
-          throws Exception {
-        if (isAccessControlException(exception)) {
-          return RetryAction.FAIL; // do not retry
-        }
-        if (exception instanceof ServiceException) {
-          OMNotLeaderException notLeaderException =
-              getNotLeaderException(exception);
-          if (notLeaderException != null) {
-            retryExceptions.add(getExceptionMsg(notLeaderException, failovers));
-            if (LOG.isDebugEnabled()) {
-              LOG.debug("RetryProxy: {}", notLeaderException.getMessage());
-            }
-
-            // TODO: NotLeaderException should include the host
-            //  address of the suggested leader along with the nodeID.
-            //  Failing over just based on nodeID is not very robust.
-
-            // OMFailoverProxyProvider#performFailover() is a dummy call and
-            // does not perform any failover. Failover manually to the next OM.
-            omFailoverProxyProvider.performFailoverToNextProxy();
-            return getRetryAction(RetryDecision.FAILOVER_AND_RETRY, failovers);
-          }
-
-          OMLeaderNotReadyException leaderNotReadyException =
-              getLeaderNotReadyException(exception);
-          // As in this case, current OM node is leader, but it is not ready.
-          // OMFailoverProxyProvider#performFailover() is a dummy call and
-          // does not perform any failover.
-          // So Just retry with same OM node.
-          if (leaderNotReadyException != null) {
-            retryExceptions.add(getExceptionMsg(leaderNotReadyException,
-                failovers));
-            if (LOG.isDebugEnabled()) {
-              LOG.debug("RetryProxy: {}", leaderNotReadyException.getMessage());
-            }
-            // HDDS-3465. OM index will not change, but LastOmID will be
-            // updated to currentOMId, so that waitTime calculation will
-            // know lastOmID and currentID are same and need to increment
-            // wait time in between.
-            omFailoverProxyProvider.performFailoverIfRequired(
-                omFailoverProxyProvider.getCurrentProxyOMNodeId());
-            return getRetryAction(RetryDecision.FAILOVER_AND_RETRY, failovers);
-          }
-        }
-
-        // For all other exceptions other than LeaderNotReadyException and
-        // NotLeaderException fail over manually to the next OM Node proxy.
-        // OMFailoverProxyProvider#performFailover() is a dummy call and
-        // does not perform any failover.
-        retryExceptions.add(getExceptionMsg(exception, failovers));
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("RetryProxy: {}", exception.getCause() != null ?
-              exception.getCause().getMessage() : exception.getMessage());
-        }
-        omFailoverProxyProvider.performFailoverToNextProxy();
-        return getRetryAction(RetryDecision.FAILOVER_AND_RETRY, failovers);
-      }
-
-      private RetryAction getRetryAction(RetryDecision fallbackAction,
-          int failovers) {
-        if (failovers < maxFailovers) {
-          return new RetryAction(fallbackAction,
-              omFailoverProxyProvider.getWaitTime());
-        } else {
-          StringBuilder allRetryExceptions = new StringBuilder();
-          allRetryExceptions.append("\n");
-          retryExceptions.stream().forEach(e -> allRetryExceptions.append(e));
-          LOG.error("Failed to connect to OMs: {}. Attempted {} failovers. " +
-                  "Got following exceptions during retries: {}",
-              omFailoverProxyProvider.getOMProxyInfos(), maxFailovers,
-              allRetryExceptions.toString());
-          retryExceptions.clear();
-          return RetryAction.FAIL;
-        }
-      }
-    };
-
     OzoneManagerProtocolPB proxy = (OzoneManagerProtocolPB) RetryProxy.create(
-        OzoneManagerProtocolPB.class, failoverProxyProvider, retryPolicy);
+        OzoneManagerProtocolPB.class, failoverProxyProvider,
+        failoverProxyProvider.getRetryPolicy(maxFailovers));
     return proxy;
   }
 
-  private String getExceptionMsg(Exception e, int retryAttempt) {
-    StringBuilder exceptionMsg = new StringBuilder()
-        .append("Retry Attempt ")
-        .append(retryAttempt)
-        .append(" Exception - ");
-    if (e.getCause() == null) {
-      exceptionMsg.append(e.getClass().getCanonicalName())
-          .append(": ")
-          .append(e.getMessage());
-    } else {
-      exceptionMsg.append(e.getCause().getClass().getCanonicalName())
-          .append(": ")
-          .append(e.getCause().getMessage());
-    }
-    return exceptionMsg.toString();
-  }
-
-  /**
-   * Check if exception is OMLeaderNotReadyException.
-   *
-   * @param exception
-   * @return OMLeaderNotReadyException
-   */
-  private OMLeaderNotReadyException getLeaderNotReadyException(
-      Exception exception) {
-    Throwable cause = exception.getCause();
-    if (cause instanceof RemoteException) {
-      IOException ioException =
-          ((RemoteException) cause).unwrapRemoteException();
-      if (ioException instanceof OMLeaderNotReadyException) {
-        return (OMLeaderNotReadyException) ioException;
-      }
-    }
-    return null;
-  }
-
-  /**
-   * Unwrap exception to check if it is some kind of access control problem
-   * ({@link AccessControlException} or {@link SecretManager.InvalidToken}).
-   */
-  private boolean isAccessControlException(Exception ex) {
-    if (ex instanceof ServiceException) {
-      Throwable t = ex.getCause();
-      if (t instanceof RemoteException) {
-        t = ((RemoteException) t).unwrapRemoteException();
-      }
-      while (t != null) {
-        if (t instanceof AccessControlException ||
-            t instanceof SecretManager.InvalidToken) {
-          return true;
-        }
-        t = t.getCause();
-      }
-    }
-    return false;
-  }
-
-  /**
-   * Check if exception is a OMNotLeaderException.
-   *
-   * @return OMNotLeaderException.
-   */
-  private OMNotLeaderException getNotLeaderException(Exception exception) {
-    Throwable cause = exception.getCause();
-    if (cause instanceof RemoteException) {
-      IOException ioException =
-          ((RemoteException) cause).unwrapRemoteException();
-      if (ioException instanceof OMNotLeaderException) {
-        return (OMNotLeaderException) ioException;
-      }
-    }
-    return null;
-  }
-
   @VisibleForTesting
   public OMFailoverProxyProvider getOmFailoverProxyProvider() {
     return omFailoverProxyProvider;
diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/failover/TestOMFailovers.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/failover/TestOMFailovers.java
new file mode 100644
index 0000000..860f3ed
--- /dev/null
+++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/failover/TestOMFailovers.java
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.om.failover;
+
+import com.google.protobuf.RpcController;
+import com.google.protobuf.ServiceException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import org.apache.hadoop.hdds.conf.ConfigurationSource;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.retry.RetryProxy;
+import org.apache.hadoop.ozone.OzoneConfigKeys;
+import org.apache.hadoop.ozone.om.ha.OMFailoverProxyProvider;
+import org.apache.hadoop.ozone.om.ha.OMProxyInfo;
+import org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse;
+import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.event.Level;
+
+/**
+ * Tests OM failover protocols using a Mock Failover provider and a Mock OM
+ * Protocol.
+ */
+public class TestOMFailovers {
+
+  private ConfigurationSource conf = new OzoneConfiguration();
+  private Exception testException;
+
+  @Test
+  public void testAccessContorlExceptionFailovers() throws Exception {
+
+    testException = new AccessControlException();
+
+    GenericTestUtils.setLogLevel(OMFailoverProxyProvider.LOG, Level.DEBUG);
+    GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer
+        .captureLogs(OMFailoverProxyProvider.LOG);
+
+    MockFailoverProxyProvider failoverProxyProvider =
+        new MockFailoverProxyProvider(conf);
+
+    OzoneManagerProtocolPB proxy = (OzoneManagerProtocolPB) RetryProxy
+        .create(OzoneManagerProtocolPB.class, failoverProxyProvider,
+            failoverProxyProvider.getRetryPolicy(
+                OzoneConfigKeys.OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT));
+
+    try {
+      proxy.submitRequest(null, null);
+      Assert.fail("Request should fail with AccessControlException");
+    } catch (Exception ex) {
+      Assert.assertTrue(ex instanceof ServiceException);
+
+      // Request should try all OMs one be one and fail when the last OM also
+      // throws AccessControlException.
+      GenericTestUtils.assertExceptionContains("ServiceException of " +
+          "type class org.apache.hadoop.security.AccessControlException for " +
+          "om3", ex);
+      Assert.assertTrue(ex.getCause() instanceof AccessControlException);
+
+      logCapturer.getOutput().contains(getRetryProxyDebugMsg("om1"));
+      logCapturer.getOutput().contains(getRetryProxyDebugMsg("om2"));
+      logCapturer.getOutput().contains(getRetryProxyDebugMsg("om3"));
+    }
+  }
+
+  private String getRetryProxyDebugMsg(String omNodeId) {
+    return "RetryProxy: OM " + omNodeId + ": AccessControlException: " +
+        "Permission denied.";
+  }
+
+  private final class MockOzoneManagerProtocol
+      implements OzoneManagerProtocolPB {
+
+    private final String omNodeId;
+    // Exception to throw when submitMockRequest is called
+    private final Exception exception;
+
+    private MockOzoneManagerProtocol(String nodeId, Exception ex) {
+      omNodeId = nodeId;
+      exception = ex;
+    }
+
+    @Override
+    public OMResponse submitRequest(RpcController controller,
+        OzoneManagerProtocolProtos.OMRequest request) throws ServiceException {
+      throw new ServiceException("ServiceException of type " +
+          exception.getClass() + " for "+ omNodeId, exception);
+    }
+  }
+
+  private final class MockFailoverProxyProvider
+      extends OMFailoverProxyProvider {
+
+    private MockFailoverProxyProvider(ConfigurationSource configuration)
+        throws IOException {
+      super(configuration, null, null);
+    }
+
+    @Override
+    protected void createOMProxyIfNeeded(ProxyInfo proxyInfo,
+        String nodeId) {
+      if (proxyInfo.proxy == null) {
+        proxyInfo.proxy = new MockOzoneManagerProtocol(nodeId,
+            testException);
+      }
+    }
+
+    @Override
+    protected void loadOMClientConfigs(ConfigurationSource config,
+        String omSvcId) {
+      HashMap<String, ProxyInfo<OzoneManagerProtocolPB>> omProxies =
+          new HashMap<>();
+      HashMap<String, OMProxyInfo> omProxyInfos = new HashMap<>();
+      ArrayList<String> omNodeIDList = new ArrayList<>();
+
+      for (int i = 1; i <= 3; i++) {
+        String nodeId = "om" + i;
+        omProxies.put(nodeId, new ProxyInfo<>(null, nodeId));
+        omProxyInfos.put(nodeId, null);
+        omNodeIDList.add(nodeId);
+      }
+      setProxiesForTesting(omProxies, omProxyInfos, omNodeIDList);
+    }
+
+    @Override
+    protected Text computeDelegationTokenService() {
+      return null;
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: ozone-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: ozone-commits-help@hadoop.apache.org