You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by sy...@apache.org on 2017/04/17 20:55:15 UTC

[36/50] [abbrv] hbase git commit: HBASE-16775 Fix flaky TestExportSnapshot#testExportRetry.

HBASE-16775 Fix flaky TestExportSnapshot#testExportRetry.

Reason for flakyness: Current test is probability based fault injection and triggers failure 3% of the time. Earlier when test used LocalJobRunner which didn't honor "mapreduce.map.maxattempts", it'd pass 97% time (when no fault is injected) and fail 3% time (when fault was injected). Point being, even when the test was complete wrong, we couldn't catch it because it was probability based.

This change will inject fault in a deterministic manner.
On design side, it encapsulates all testing hooks in ExportSnapshot.java into single inner class.

Change-Id: Icba866e1d56a5281748df89f4dd374bc45bad249


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/da5fb27e
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/da5fb27e
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/da5fb27e

Branch: refs/heads/hbase-12439
Commit: da5fb27eabed4a4b4d251be973ee945fb52895bf
Parents: cf3215d
Author: Apekshit Sharma <ap...@apache.org>
Authored: Thu Oct 6 14:20:58 2016 -0700
Committer: Apekshit Sharma <ap...@apache.org>
Committed: Wed Apr 12 11:11:31 2017 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/snapshot/ExportSnapshot.java   | 58 +++++++-------
 .../hbase/snapshot/TestExportSnapshot.java      | 84 +++++++++++---------
 .../snapshot/TestExportSnapshotNoCluster.java   |  2 +-
 .../hbase/snapshot/TestMobExportSnapshot.java   |  7 +-
 .../snapshot/TestMobSecureExportSnapshot.java   |  7 +-
 .../snapshot/TestSecureExportSnapshot.java      |  7 +-
 6 files changed, 93 insertions(+), 72 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/da5fb27e/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
index e2086e9..e3ad951 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
@@ -29,7 +29,6 @@ import java.util.Collections;
 import java.util.Comparator;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.Random;
 
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.Option;
@@ -110,9 +109,12 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
   private static final String CONF_BANDWIDTH_MB = "snapshot.export.map.bandwidth.mb";
   protected static final String CONF_SKIP_TMP = "snapshot.export.skip.tmp";
 
-  static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
-  static final String CONF_TEST_RETRY = "test.snapshot.export.failure.retry";
-
+  static class Testing {
+    static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
+    static final String CONF_TEST_FAILURE_COUNT = "test.snapshot.export.failure.count";
+    int failuresCountToInject = 0;
+    int injectedFailureCount = 0;
+  }
 
   // Command line options and defaults.
   static final class Options {
@@ -149,12 +151,10 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
 
   private static class ExportMapper extends Mapper<BytesWritable, NullWritable,
                                                    NullWritable, NullWritable> {
+    private static final Log LOG = LogFactory.getLog(ExportMapper.class);
     final static int REPORT_SIZE = 1 * 1024 * 1024;
     final static int BUFFER_SIZE = 64 * 1024;
 
-    private boolean testFailures;
-    private Random random;
-
     private boolean verifyChecksum;
     private String filesGroup;
     private String filesUser;
@@ -169,9 +169,12 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
     private Path inputArchive;
     private Path inputRoot;
 
+    private static Testing testing = new Testing();
+
     @Override
     public void setup(Context context) throws IOException {
       Configuration conf = context.getConfiguration();
+
       Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
       Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
 
@@ -186,8 +189,6 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
       inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
       outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
 
-      testFailures = conf.getBoolean(CONF_TEST_FAILURE, false);
-
       try {
         srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
         inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
@@ -210,6 +211,12 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
       for (Counter c : Counter.values()) {
         context.getCounter(c).increment(0);
       }
+      if (context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) {
+        testing.failuresCountToInject = conf.getInt(Testing.CONF_TEST_FAILURE_COUNT, 0);
+        // Get number of times we have already injected failure based on attempt number of this
+        // task.
+        testing.injectedFailureCount = context.getTaskAttemptID().getId();
+      }
     }
 
     @Override
@@ -251,35 +258,23 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
       return new Path(outputArchive, path);
     }
 
-    /*
-     * Used by TestExportSnapshot to simulate a failure
+    /**
+     * Used by TestExportSnapshot to test for retries when failures happen.
+     * Failure is injected in {@link #copyFile(Context, SnapshotFileInfo, Path)}.
      */
     private void injectTestFailure(final Context context, final SnapshotFileInfo inputInfo)
         throws IOException {
-      if (testFailures) {
-        if (context.getConfiguration().getBoolean(CONF_TEST_RETRY, false)) {
-          if (random == null) {
-            random = new Random();
-          }
-
-          // FLAKY-TEST-WARN: lower is better, we can get some runs without the
-          // retry, but at least we reduce the number of test failures due to
-          // this test exception from the same map task.
-          if (random.nextFloat() < 0.03) {
-            throw new IOException("TEST RETRY FAILURE: Unable to copy input=" + inputInfo
-                                  + " time=" + System.currentTimeMillis());
-          }
-        } else {
-          context.getCounter(Counter.COPY_FAILED).increment(1);
-          throw new IOException("TEST FAILURE: Unable to copy input=" + inputInfo);
-        }
-      }
+      if (!context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) return;
+      if (testing.injectedFailureCount >= testing.failuresCountToInject) return;
+      testing.injectedFailureCount++;
+      context.getCounter(Counter.COPY_FAILED).increment(1);
+      LOG.debug("Injecting failure. Count: " + testing.injectedFailureCount);
+      throw new IOException(String.format("TEST FAILURE (%d of max %d): Unable to copy input=%s",
+          testing.injectedFailureCount, testing.failuresCountToInject, inputInfo));
     }
 
     private void copyFile(final Context context, final SnapshotFileInfo inputInfo,
         final Path outputPath) throws IOException {
-      injectTestFailure(context, inputInfo);
-
       // Get the file information
       FileStatus inputStat = getSourceFileStatus(context, inputInfo);
 
@@ -318,6 +313,7 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
         }
       } finally {
         in.close();
+        injectTestFailure(context, inputInfo);
       }
     }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/da5fb27e/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
index 1beb518..52412d9 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.snapshot;
 import static org.apache.hadoop.util.ToolRunner.run;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
 
 import java.io.IOException;
 import java.net.URI;
@@ -44,12 +45,10 @@ import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.SnapshotDescription;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
 import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.util.ToolRunner;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
@@ -57,6 +56,7 @@ import org.junit.BeforeClass;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
 import org.junit.rules.TestRule;
 
 /**
@@ -72,6 +72,9 @@ public class TestExportSnapshot {
 
   protected final static byte[] FAMILY = Bytes.toBytes("cf");
 
+  @Rule
+  public final TestName testName = new TestName();
+
   protected TableName tableName;
   private byte[] emptySnapshotName;
   private byte[] snapshotName;
@@ -85,17 +88,27 @@ public class TestExportSnapshot {
     conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 6);
     conf.setBoolean("hbase.master.enabletable.roundrobin", true);
     conf.setInt("mapreduce.map.maxattempts", 10);
+    // If a single node has enough failures (default 3), resource manager will blacklist it.
+    // With only 2 nodes and tests injecting faults, we don't want that.
+    conf.setInt("mapreduce.job.maxtaskfailures.per.tracker", 100);
   }
 
   @BeforeClass
   public static void setUpBeforeClass() throws Exception {
     setUpBaseConf(TEST_UTIL.getConfiguration());
-    TEST_UTIL.startMiniCluster(3);
+    // Setup separate test-data directory for MR cluster and set corresponding configurations.
+    // Otherwise, different test classes running MR cluster can step on each other.
+    TEST_UTIL.getDataTestDir();
+    TEST_UTIL.startMiniZKCluster();
+    TEST_UTIL.startMiniMapReduceCluster();
+    TEST_UTIL.startMiniHBaseCluster(1, 3);
   }
 
   @AfterClass
   public static void tearDownAfterClass() throws Exception {
-    TEST_UTIL.shutdownMiniCluster();
+    TEST_UTIL.shutdownMiniHBaseCluster();
+    TEST_UTIL.shutdownMiniMapReduceCluster();
+    TEST_UTIL.shutdownMiniZKCluster();
   }
 
   /**
@@ -105,10 +118,9 @@ public class TestExportSnapshot {
   public void setUp() throws Exception {
     this.admin = TEST_UTIL.getAdmin();
 
-    long tid = System.currentTimeMillis();
-    tableName = TableName.valueOf("testtb-" + tid);
-    snapshotName = Bytes.toBytes("snaptb0-" + tid);
-    emptySnapshotName = Bytes.toBytes("emptySnaptb0-" + tid);
+    tableName = TableName.valueOf("testtb-" + testName.getMethodName());
+    snapshotName = Bytes.toBytes("snaptb0-" + testName.getMethodName());
+    emptySnapshotName = Bytes.toBytes("emptySnaptb0-" + testName.getMethodName());
 
     // create Table
     createTable();
@@ -191,16 +203,16 @@ public class TestExportSnapshot {
       Path copyDir, boolean overwrite) throws Exception {
     testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, targetName,
       filesExpected, TEST_UTIL.getDefaultRootDirPath(), copyDir,
-      overwrite, getBypassRegionPredicate());
+      overwrite, getBypassRegionPredicate(), true);
   }
 
   /**
-   * Test ExportSnapshot
+   * Creates destination directory, runs ExportSnapshot() tool, and runs some verifications.
    */
   protected static void testExportFileSystemState(final Configuration conf, final TableName tableName,
       final byte[] snapshotName, final byte[] targetName, final int filesExpected,
       final Path sourceDir, Path copyDir, final boolean overwrite,
-      final RegionPredicate bypassregionPredicate) throws Exception {
+      final RegionPredicate bypassregionPredicate, boolean success) throws Exception {
     URI hdfsUri = FileSystem.get(conf).getUri();
     FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
     copyDir = copyDir.makeQualified(fs);
@@ -218,7 +230,12 @@ public class TestExportSnapshot {
 
     // Export Snapshot
     int res = run(conf, new ExportSnapshot(), opts.toArray(new String[opts.size()]));
-    assertEquals(0, res);
+    assertEquals(success ? 0 : 1, res);
+    if (!success) {
+      final Path targetDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(targetName));
+      assertFalse(fs.exists(new Path(copyDir, targetDir)));
+      return;
+    }
 
     // Verify File-System state
     FileStatus[] rootFiles = fs.listStatus(copyDir);
@@ -242,42 +259,35 @@ public class TestExportSnapshot {
   }
 
   /**
-   * Check that ExportSnapshot will return a failure if something fails.
+   * Check that ExportSnapshot will succeed if something fails but the retry succeed.
    */
   @Test
-  public void testExportFailure() throws Exception {
-    assertEquals(1, runExportAndInjectFailures(snapshotName, false));
+  public void testExportRetry() throws Exception {
+    Path copyDir = getLocalDestinationDir();
+    FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
+    copyDir = copyDir.makeQualified(fs);
+    Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
+    conf.setBoolean(ExportSnapshot.Testing.CONF_TEST_FAILURE, true);
+    conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 2);
+    conf.setInt("mapreduce.map.maxattempts", 3);
+    testExportFileSystemState(conf, tableName, snapshotName, snapshotName, tableNumFiles,
+        TEST_UTIL.getDefaultRootDirPath(), copyDir, true, getBypassRegionPredicate(), true);
   }
 
   /**
-   * Check that ExportSnapshot will succede if something fails but the retry succede.
+   * Check that ExportSnapshot will fail if we inject failure more times than MR will retry.
    */
   @Test
-  public void testExportRetry() throws Exception {
-    assertEquals(0, runExportAndInjectFailures(snapshotName, true));
-  }
-
-  /*
-   * Execute the ExportSnapshot job injecting failures
-   */
-  private int runExportAndInjectFailures(final byte[] snapshotName, boolean retry)
-      throws Exception {
+  public void testExportFailure() throws Exception {
     Path copyDir = getLocalDestinationDir();
-    URI hdfsUri = FileSystem.get(TEST_UTIL.getConfiguration()).getUri();
     FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
     copyDir = copyDir.makeQualified(fs);
-
     Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
-    conf.setBoolean(ExportSnapshot.CONF_TEST_FAILURE, true);
-    conf.setBoolean(ExportSnapshot.CONF_TEST_RETRY, retry);
-    if (!retry) {
-      conf.setInt("mapreduce.map.maxattempts", 3);
-    }
-    // Export Snapshot
-    Path sourceDir = TEST_UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
-    String[] args = new String[] { "--snapshot", Bytes.toString(snapshotName),
-        "--copy-from", sourceDir.toString(), "--copy-to", copyDir.toString() };
-    return ToolRunner.run(conf, new ExportSnapshot(), args);
+    conf.setBoolean(ExportSnapshot.Testing.CONF_TEST_FAILURE, true);
+    conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 4);
+    conf.setInt("mapreduce.map.maxattempts", 3);
+    testExportFileSystemState(conf, tableName, snapshotName, snapshotName, tableNumFiles,
+        TEST_UTIL.getDefaultRootDirPath(), copyDir, true, getBypassRegionPredicate(), false);
   }
 
   /*

http://git-wip-us.apache.org/repos/asf/hbase/blob/da5fb27e/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
index e2d7c11..cd5ff6c 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
@@ -104,7 +104,7 @@ public class TestExportSnapshotNoCluster {
     TableName tableName = builder.getTableDescriptor().getTableName();
     TestExportSnapshot.testExportFileSystemState(TEST_UTIL.getConfiguration(),
       tableName, snapshotName, snapshotName, snapshotFilesCount,
-      testDir, getDestinationDir(), false, null);
+      testDir, getDestinationDir(), false, null, true);
   }
 
   private Path getDestinationDir() {

http://git-wip-us.apache.org/repos/asf/hbase/blob/da5fb27e/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
index c375b0a..55686b1 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
@@ -45,7 +45,12 @@ public class TestMobExportSnapshot extends TestExportSnapshot {
   @BeforeClass
   public static void setUpBeforeClass() throws Exception {
     setUpBaseConf(TEST_UTIL.getConfiguration());
-    TEST_UTIL.startMiniCluster(3);
+    // Setup separate test-data directory for MR cluster and set corresponding configurations.
+    // Otherwise, different test classes running MR cluster can step on each other.
+    TEST_UTIL.getDataTestDir();
+    TEST_UTIL.startMiniZKCluster();
+    TEST_UTIL.startMiniMapReduceCluster();
+    TEST_UTIL.startMiniHBaseCluster(1, 3);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/da5fb27e/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
index 8154995..c0f31d5 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
@@ -37,6 +37,9 @@ public class TestMobSecureExportSnapshot extends TestMobExportSnapshot {
   @BeforeClass
   public static void setUpBeforeClass() throws Exception {
     setUpBaseConf(TEST_UTIL.getConfiguration());
+    // Setup separate test-data directory for MR cluster and set corresponding configurations.
+    // Otherwise, different test classes running MR cluster can step on each other.
+    TEST_UTIL.getDataTestDir();
 
     // set the always on security provider
     UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
@@ -45,7 +48,9 @@ public class TestMobSecureExportSnapshot extends TestMobExportSnapshot {
     // setup configuration
     SecureTestUtil.enableSecurity(TEST_UTIL.getConfiguration());
 
-    TEST_UTIL.startMiniCluster(3);
+    TEST_UTIL.startMiniZKCluster();
+    TEST_UTIL.startMiniMapReduceCluster();
+    TEST_UTIL.startMiniHBaseCluster(1, 3);
 
     // Wait for the ACL table to become available
     TEST_UTIL.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);

http://git-wip-us.apache.org/repos/asf/hbase/blob/da5fb27e/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
index f335e4e..4d35b71 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
@@ -42,6 +42,9 @@ public class TestSecureExportSnapshot extends TestExportSnapshot {
   @BeforeClass
   public static void setUpBeforeClass() throws Exception {
     setUpBaseConf(TEST_UTIL.getConfiguration());
+    // Setup separate test-data directory for MR cluster and set corresponding configurations.
+    // Otherwise, different test classes running MR cluster can step on each other.
+    TEST_UTIL.getDataTestDir();
 
     // set the always on security provider
     UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
@@ -50,7 +53,9 @@ public class TestSecureExportSnapshot extends TestExportSnapshot {
     // setup configuration
     SecureTestUtil.enableSecurity(TEST_UTIL.getConfiguration());
 
-    TEST_UTIL.startMiniCluster(3);
+    TEST_UTIL.startMiniZKCluster();
+    TEST_UTIL.startMiniMapReduceCluster();
+    TEST_UTIL.startMiniHBaseCluster(1, 3);
 
     // Wait for the ACL table to become available
     TEST_UTIL.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);