You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uniffle.apache.org by ro...@apache.org on 2022/08/29 04:00:26 UTC

[incubator-uniffle] branch master updated: Fix flaky test about kerberos (#191)

This is an automated email from the ASF dual-hosted git repository.

roryqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
     new b46f7c23 Fix flaky test about kerberos (#191)
b46f7c23 is described below

commit b46f7c23b6e348e9cecbf64aa71b877b62510705
Author: Junfan Zhang <ju...@outlook.com>
AuthorDate: Mon Aug 29 12:00:21 2022 +0800

    Fix flaky test about kerberos (#191)
    
    ### What changes were proposed in this pull request?
    Introduce the retry mechanism when minidfs cluster startup encounter BindException.
    
    ### Why are the changes needed?
    To fix the flaky test about kerberos test
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Not necessary
---
 .../org/apache/uniffle/common/util/RetryUtils.java |  5 ++
 .../org/apache/uniffle/common/KerberizedHdfs.java  | 54 +++++++++++++---------
 2 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/common/src/main/java/org/apache/uniffle/common/util/RetryUtils.java b/common/src/main/java/org/apache/uniffle/common/util/RetryUtils.java
index 889d459c..03f817ff 100644
--- a/common/src/main/java/org/apache/uniffle/common/util/RetryUtils.java
+++ b/common/src/main/java/org/apache/uniffle/common/util/RetryUtils.java
@@ -31,6 +31,11 @@ public class RetryUtils {
     return retry(cmd, null, intervalMs, retryTimes, null);
   }
 
+  public static <T> T retry(RetryCmd<T> cmd, long intervalMs, int retryTimes,
+      Set<Class> exceptionClasses) throws Throwable {
+    return retry(cmd, null, intervalMs, retryTimes, exceptionClasses);
+  }
+
   /**
    * @param cmd              command to execute
    * @param callBack         the callback command executed when the attempt of command fail
diff --git a/common/src/test/java/org/apache/uniffle/common/KerberizedHdfs.java b/common/src/test/java/org/apache/uniffle/common/KerberizedHdfs.java
index 729be6c6..4275e113 100644
--- a/common/src/test/java/org/apache/uniffle/common/KerberizedHdfs.java
+++ b/common/src/test/java/org/apache/uniffle/common/KerberizedHdfs.java
@@ -22,6 +22,7 @@ import java.io.File;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.Serializable;
+import java.net.BindException;
 import java.net.ServerSocket;
 import java.nio.file.Files;
 import java.nio.file.Path;
@@ -30,6 +31,7 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Properties;
 
+import com.google.common.collect.Sets;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -48,6 +50,7 @@ import org.apache.hadoop.security.ssl.KeyStoreTestUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.uniffle.common.util.RetryUtils;
 import org.apache.uniffle.common.util.RssUtils;
 
 import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SASL_KEY;
@@ -92,7 +95,11 @@ public class KerberizedHdfs implements Serializable {
     kerberizedDfsBaseDir = Files.createTempDirectory("kerberizedDfsBaseDir").toFile().toPath();
 
     startKDC();
-    startKerberizedDFS();
+    try {
+      startKerberizedDFS();
+    } catch (Throwable t) {
+      throw new Exception(t);
+    }
     setupDFSData();
   }
 
@@ -158,7 +165,7 @@ public class KerberizedHdfs implements Serializable {
     return conf;
   }
 
-  private void startKerberizedDFS() throws Exception {
+  private void startKerberizedDFS() throws Throwable {
     String krb5Conf = kdc.getKrb5conf().getAbsolutePath();
     System.setProperty("java.security.krb5.conf", krb5Conf);
 
@@ -181,26 +188,29 @@ public class KerberizedHdfs implements Serializable {
     hdfsConf.set("hadoop.proxyuser.hdfs.groups", "*");
     hdfsConf.set("hadoop.proxyuser.hdfs.users", "*");
 
-    List<Integer> ports = findAvailablePorts(5);
-    LOGGER.info("Find available ports: {}", ports);
-
-    hdfsConf.set("dfs.datanode.ipc.address", "0.0.0.0:" + ports.get(0));
-    hdfsConf.set("dfs.datanode.address", "0.0.0.0:" + ports.get(1));
-    hdfsConf.set("dfs.datanode.http.address", "0.0.0.0:" + ports.get(2));
-    hdfsConf.set("dfs.datanode.http.address", "0.0.0.0:" + ports.get(3));
-
-    kerberizedDfsCluster = ugi.doAs(new PrivilegedExceptionAction<MiniDFSCluster>() {
-      @Override
-      public MiniDFSCluster run() throws Exception {
-        return new MiniDFSCluster
-            .Builder(hdfsConf)
-            .nameNodePort(ports.get(4))
-            .numDataNodes(1)
-            .clusterId("kerberized-cluster-1")
-            .checkDataNodeAddrConfig(true)
-            .build();
-      }
-    });
+    this.kerberizedDfsCluster = RetryUtils.retry(() -> {
+      List<Integer> ports = findAvailablePorts(5);
+      LOGGER.info("Find available ports: {}", ports);
+
+      hdfsConf.set("dfs.datanode.ipc.address", "0.0.0.0:" + ports.get(0));
+      hdfsConf.set("dfs.datanode.address", "0.0.0.0:" + ports.get(1));
+      hdfsConf.set("dfs.datanode.http.address", "0.0.0.0:" + ports.get(2));
+      hdfsConf.set("dfs.datanode.http.address", "0.0.0.0:" + ports.get(3));
+
+      return ugi.doAs(new PrivilegedExceptionAction<MiniDFSCluster>() {
+
+        @Override
+        public MiniDFSCluster run() throws Exception {
+          return new MiniDFSCluster
+              .Builder(hdfsConf)
+              .nameNodePort(ports.get(4))
+              .numDataNodes(1)
+              .clusterId("kerberized-cluster-1")
+              .checkDataNodeAddrConfig(true)
+              .build();
+        }
+      });
+    }, 1000L, 5, Sets.newHashSet(BindException.class));
   }
 
   private void startKDC() throws Exception {