You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by ct...@apache.org on 2020/03/05 16:18:15 UTC

[accumulo] branch master updated: Remove miniDFS from HalfDeadTServerIT (#1545)

This is an automated email from the ASF dual-hosted git repository.

ctubbsii pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/master by this push:
     new 5eda683  Remove miniDFS from HalfDeadTServerIT (#1545)
5eda683 is described below

commit 5eda6838f0127129cd67ccd9f76d124a7cc30091
Author: Christopher Tubbs <ct...@apache.org>
AuthorDate: Thu Mar 5 11:18:09 2020 -0500

    Remove miniDFS from HalfDeadTServerIT (#1545)
    
    Remove use of miniDFS from IT, because sometimes miniDFS is slow to
    start up, and it causes the IT to time out. It is not needed for the
    test, since the test case works fine on LocalFileSystem-based DFS.
    
    Use java.util.scanner to clean up boilerplate BufferedReader stuffs.
    
    Suppress some spammy ZK logs in IT log4j2 config.
    
    Add prefix to DumpOutput class in HalfDeadTServerIT to help troubleshoot
    the IT in future.
    
    Build shared library for fake_disk_failure.so only once, before the
    class.
    
    Use ProcessBuilder to connect the IO when building the shared library,
    so any errors from the compiler are shown on the IT's console capture.
    
    Remove compilation warning about unused variable in fake_disk_failure.c
    
    Add comments and make some variables a bit more explicit.
---
 .../test/functional/HalfDeadTServerIT.java         | 142 ++++++++++++---------
 test/src/main/resources/log4j2-test.properties     |   6 +-
 test/src/test/c/fake_disk_failure.c                |   1 -
 3 files changed, 84 insertions(+), 65 deletions(-)

diff --git a/test/src/main/java/org/apache/accumulo/test/functional/HalfDeadTServerIT.java b/test/src/main/java/org/apache/accumulo/test/functional/HalfDeadTServerIT.java
index 20abf04..28ec5fa 100644
--- a/test/src/main/java/org/apache/accumulo/test/functional/HalfDeadTServerIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/functional/HalfDeadTServerIT.java
@@ -22,16 +22,18 @@ import static org.apache.accumulo.fate.util.UtilWaitThread.sleepUninterruptibly;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
+import static org.junit.Assume.assumeTrue;
 
-import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.InputStreamReader;
+import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Map;
+import java.util.Scanner;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.accumulo.core.client.Accumulo;
 import org.apache.accumulo.core.client.AccumuloClient;
@@ -44,19 +46,36 @@ import org.apache.accumulo.test.TestIngest;
 import org.apache.accumulo.test.VerifyIngest;
 import org.apache.accumulo.tserver.TabletServer;
 import org.apache.hadoop.conf.Configuration;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
 
+/**
+ * This test verifies tserver behavior when the node experiences certain failure states that cause
+ * it to be unresponsive to network I/O and/or disk access.
+ *
+ * <p>
+ * This failure state is simulated in a tserver by running that tserver with a shared library that
+ * hooks the read/write system calls. When the shared library sees a specific trigger file, it
+ * pauses the system calls and prints "sleeping", until that file is deleted, after which it proxies
+ * the system call to the real system implementation.
+ *
+ * <p>
+ * In response to failures of the type this test simulates, the tserver should recover if the system
+ * call is stalled for less than the ZooKeeper timeout. Otherwise, it should lose its lock in
+ * ZooKeeper and terminate itself.
+ */
 public class HalfDeadTServerIT extends ConfigurableMacBase {
 
   @Override
   public void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) {
+    // configure only one tserver from mini; mini won't less us configure 0, so instead, we will
+    // start only 1, and kill it to start our own in the desired simulation environment
     cfg.setNumTservers(1);
     cfg.setProperty(Property.INSTANCE_ZK_TIMEOUT, "15s");
     cfg.setProperty(Property.GENERAL_RPC_TIMEOUT, "5s");
     cfg.setProperty(Property.TSERV_NATIVEMAP_ENABLED, Boolean.FALSE.toString());
-    cfg.useMiniDFS(true);
   }
 
   @Override
@@ -64,41 +83,63 @@ public class HalfDeadTServerIT extends ConfigurableMacBase {
     return 4 * 60;
   }
 
-  class DumpOutput extends Daemon {
+  private static final AtomicBoolean sharedLibBuilt = new AtomicBoolean(false);
 
-    private final BufferedReader rdr;
-    private final StringBuilder output;
+  @SuppressFBWarnings(value = "COMMAND_INJECTION",
+      justification = "command executed is not from user input")
+  @BeforeClass
+  public static void buildSharedLib() throws IOException, InterruptedException {
+    String root = System.getProperty("user.dir");
+    String source = root + "/src/test/c/fake_disk_failure.c";
+    String lib = root + "/target/fake_disk_failure.so";
+    String platform = System.getProperty("os.name");
+    String[] cmd;
+    if (platform.equals("Darwin")) {
+      cmd = new String[] {"gcc", "-arch", "x86_64", "-arch", "i386", "-dynamiclib", "-O3", "-fPIC",
+          source, "-o", lib};
+    } else {
+      cmd = new String[] {"gcc", "-D_GNU_SOURCE", "-Wall", "-fPIC", source, "-shared", "-o", lib,
+          "-ldl"};
+    }
+    // inherit IO to link see the command's output on the current console
+    Process gcc = new ProcessBuilder(cmd).inheritIO().start();
+    // wait for and record whether the compilation of the native library succeeded
+    sharedLibBuilt.set(gcc.waitFor() == 0);
+  }
+
+  // a simple class to capture a launched process' output (and repeat it back)
+  private static class DumpOutput extends Daemon {
 
-    DumpOutput(InputStream is) {
-      rdr = new BufferedReader(new InputStreamReader(is));
-      output = new StringBuilder();
+    private final Scanner lineScanner;
+    private final StringBuilder capturedOutput;
+    private final PrintStream printer;
+    private final String printerName;
+
+    DumpOutput(InputStream is, PrintStream out, String name) {
+      lineScanner = new Scanner(is);
+      capturedOutput = new StringBuilder();
+      printer = out;
+      printerName = name;
     }
 
     @Override
     public void run() {
-      try {
-        while (true) {
-          String line = rdr.readLine();
-          if (line == null)
-            break;
-          System.out.println(line);
-          output.append(line);
-          output.append("\n");
-        }
-      } catch (IOException ex) {
-        log.error("IOException", ex);
+      while (lineScanner.hasNextLine()) {
+        String line = lineScanner.nextLine();
+        capturedOutput.append(line);
+        capturedOutput.append("\n");
+        printer.printf("%s(%s):%s%n", getClass().getSimpleName(), printerName, line);
       }
     }
 
-    @Override
-    public String toString() {
-      return output.toString();
+    public String getCaptured() {
+      return capturedOutput.toString();
     }
   }
 
   @Test
   public void testRecover() throws Exception {
-    test(10);
+    test(10, false);
   }
 
   @Test
@@ -112,18 +153,13 @@ public class HalfDeadTServerIT extends ConfigurableMacBase {
     }
   }
 
-  public String test(int seconds) throws Exception {
-    return test(seconds, false);
-  }
-
   @SuppressFBWarnings(value = {"PATH_TRAVERSAL_IN", "COMMAND_INJECTION"},
       justification = "path provided by test; command args provided by test")
   public String test(int seconds, boolean expectTserverDied) throws Exception {
-    if (!makeDiskFailureLibrary())
-      return null;
-    try (AccumuloClient c = Accumulo.newClient().from(getClientProperties()).build()) {
-      while (c.instanceOperations().getTabletServers().isEmpty()) {
-        // wait until a tserver is running
+    assumeTrue("Shared library did not build", sharedLibBuilt.get());
+    try (AccumuloClient client = Accumulo.newClient().from(getClientProperties()).build()) {
+      while (client.instanceOperations().getTabletServers().isEmpty()) {
+        // wait until the tserver that we need to kill is running
         Thread.sleep(50);
       }
 
@@ -148,16 +184,18 @@ public class HalfDeadTServerIT extends ConfigurableMacBase {
       env.put("DYLD_FORCE_FLAT_NAMESPACE", "true");
       Process ingest = null;
       Process tserver = builder.start();
-      DumpOutput t = new DumpOutput(tserver.getInputStream());
+      DumpOutput stderrCollector = new DumpOutput(tserver.getErrorStream(), System.err, "stderr");
+      DumpOutput stdoutCollector = new DumpOutput(tserver.getInputStream(), System.out, "stdout");
       try {
-        t.start();
+        stderrCollector.start();
+        stdoutCollector.start();
         sleepUninterruptibly(1, TimeUnit.SECONDS);
         // don't need the regular tablet server
         cluster.killProcess(ServerType.TABLET_SERVER,
             cluster.getProcesses().get(ServerType.TABLET_SERVER).iterator().next());
         sleepUninterruptibly(1, TimeUnit.SECONDS);
-        c.tableOperations().create("test_ingest");
-        assertEquals(1, c.instanceOperations().getTabletServers().size());
+        client.tableOperations().create("test_ingest");
+        assertEquals(1, client.instanceOperations().getTabletServers().size());
         int rows = 100 * 1000;
         ingest =
             cluster.exec(TestIngest.class, "-c", cluster.getClientPropsPath(), "--rows", rows + "")
@@ -179,15 +217,16 @@ public class HalfDeadTServerIT extends ConfigurableMacBase {
           assertEquals(0, ingest.waitFor());
           VerifyIngest.VerifyParams params = new VerifyIngest.VerifyParams(getClientProperties());
           params.rows = rows;
-          VerifyIngest.verifyIngest(c, params);
+          VerifyIngest.verifyIngest(client, params);
         } else {
           sleepUninterruptibly(5, TimeUnit.SECONDS);
           tserver.waitFor();
-          t.join();
+          stderrCollector.join();
+          stdoutCollector.join();
           tserver = null;
         }
         // verify the process was blocked
-        String results = t.toString();
+        String results = stdoutCollector.getCaptured();
         assertTrue(results.contains("sleeping\nsleeping\nsleeping\n"));
         return results;
       } finally {
@@ -207,30 +246,11 @@ public class HalfDeadTServerIT extends ConfigurableMacBase {
           } finally {
             tserver.destroy();
             tserver.waitFor();
-            t.join();
+            stderrCollector.join();
+            stdoutCollector.join();
           }
         }
       }
     }
   }
-
-  @SuppressFBWarnings(value = "COMMAND_INJECTION",
-      justification = "command executed is not from user input")
-  private boolean makeDiskFailureLibrary() throws Exception {
-    String root = System.getProperty("user.dir");
-    String source = root + "/src/test/c/fake_disk_failure.c";
-    String lib = root + "/target/fake_disk_failure.so";
-    String platform = System.getProperty("os.name");
-    String[] cmd;
-    if (platform.equals("Darwin")) {
-      cmd = new String[] {"gcc", "-arch", "x86_64", "-arch", "i386", "-dynamiclib", "-O3", "-fPIC",
-          source, "-o", lib};
-    } else {
-      cmd = new String[] {"gcc", "-D_GNU_SOURCE", "-Wall", "-fPIC", source, "-shared", "-o", lib,
-          "-ldl"};
-    }
-    Process gcc = Runtime.getRuntime().exec(cmd);
-    return gcc.waitFor() == 0;
-  }
-
 }
diff --git a/test/src/main/resources/log4j2-test.properties b/test/src/main/resources/log4j2-test.properties
index c0bfa17..9b10a0b 100644
--- a/test/src/main/resources/log4j2-test.properties
+++ b/test/src/main/resources/log4j2-test.properties
@@ -17,9 +17,6 @@
 # under the License.
 #
 
-## Log4j2 file that configures logging for all Accumulo services
-## The system properties referenced below are configured by accumulo-env.sh
-
 status = info
 dest = err
 name = AccumuloITLoggingProperties
@@ -69,6 +66,9 @@ logger.12.level =  info
 logger.13.name = org.apache.zookeeper.ClientCnxn
 logger.13.level = fatal
 
+logger.13b.name = org.apache.zookeeper.ClientCnxnSocket
+logger.13b.level = warn
+
 logger.14.name = org.apache.zookeeper.ZooKeeper
 logger.14.level = warn
 
diff --git a/test/src/test/c/fake_disk_failure.c b/test/src/test/c/fake_disk_failure.c
index ecd644e..ee3a638 100644
--- a/test/src/test/c/fake_disk_failure.c
+++ b/test/src/test/c/fake_disk_failure.c
@@ -25,7 +25,6 @@
 static
 void test_pause() {
   static char trickFile[1024] = "";
-  static char pid[10] = "";
   if (trickFile[0] == '\0') {
     strcpy(trickFile, getenv("TRICK_FILE"));
   }