You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by sy...@apache.org on 2017/04/17 20:55:15 UTC
[36/50] [abbrv] hbase git commit: HBASE-16775 Fix flaky
TestExportSnapshot#testExportRetry.
HBASE-16775 Fix flaky TestExportSnapshot#testExportRetry.
Reason for flakyness: Current test is probability based fault injection and triggers failure 3% of the time. Earlier when test used LocalJobRunner which didn't honor "mapreduce.map.maxattempts", it'd pass 97% time (when no fault is injected) and fail 3% time (when fault was injected). Point being, even when the test was complete wrong, we couldn't catch it because it was probability based.
This change will inject fault in a deterministic manner.
On design side, it encapsulates all testing hooks in ExportSnapshot.java into single inner class.
Change-Id: Icba866e1d56a5281748df89f4dd374bc45bad249
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/da5fb27e
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/da5fb27e
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/da5fb27e
Branch: refs/heads/hbase-12439
Commit: da5fb27eabed4a4b4d251be973ee945fb52895bf
Parents: cf3215d
Author: Apekshit Sharma <ap...@apache.org>
Authored: Thu Oct 6 14:20:58 2016 -0700
Committer: Apekshit Sharma <ap...@apache.org>
Committed: Wed Apr 12 11:11:31 2017 -0700
----------------------------------------------------------------------
.../hadoop/hbase/snapshot/ExportSnapshot.java | 58 +++++++-------
.../hbase/snapshot/TestExportSnapshot.java | 84 +++++++++++---------
.../snapshot/TestExportSnapshotNoCluster.java | 2 +-
.../hbase/snapshot/TestMobExportSnapshot.java | 7 +-
.../snapshot/TestMobSecureExportSnapshot.java | 7 +-
.../snapshot/TestSecureExportSnapshot.java | 7 +-
6 files changed, 93 insertions(+), 72 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/da5fb27e/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
index e2086e9..e3ad951 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
@@ -29,7 +29,6 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
-import java.util.Random;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
@@ -110,9 +109,12 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
private static final String CONF_BANDWIDTH_MB = "snapshot.export.map.bandwidth.mb";
protected static final String CONF_SKIP_TMP = "snapshot.export.skip.tmp";
- static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
- static final String CONF_TEST_RETRY = "test.snapshot.export.failure.retry";
-
+ static class Testing {
+ static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
+ static final String CONF_TEST_FAILURE_COUNT = "test.snapshot.export.failure.count";
+ int failuresCountToInject = 0;
+ int injectedFailureCount = 0;
+ }
// Command line options and defaults.
static final class Options {
@@ -149,12 +151,10 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
private static class ExportMapper extends Mapper<BytesWritable, NullWritable,
NullWritable, NullWritable> {
+ private static final Log LOG = LogFactory.getLog(ExportMapper.class);
final static int REPORT_SIZE = 1 * 1024 * 1024;
final static int BUFFER_SIZE = 64 * 1024;
- private boolean testFailures;
- private Random random;
-
private boolean verifyChecksum;
private String filesGroup;
private String filesUser;
@@ -169,9 +169,12 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
private Path inputArchive;
private Path inputRoot;
+ private static Testing testing = new Testing();
+
@Override
public void setup(Context context) throws IOException {
Configuration conf = context.getConfiguration();
+
Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
@@ -186,8 +189,6 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
- testFailures = conf.getBoolean(CONF_TEST_FAILURE, false);
-
try {
srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
@@ -210,6 +211,12 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
for (Counter c : Counter.values()) {
context.getCounter(c).increment(0);
}
+ if (context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) {
+ testing.failuresCountToInject = conf.getInt(Testing.CONF_TEST_FAILURE_COUNT, 0);
+ // Get number of times we have already injected failure based on attempt number of this
+ // task.
+ testing.injectedFailureCount = context.getTaskAttemptID().getId();
+ }
}
@Override
@@ -251,35 +258,23 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
return new Path(outputArchive, path);
}
- /*
- * Used by TestExportSnapshot to simulate a failure
+ /**
+ * Used by TestExportSnapshot to test for retries when failures happen.
+ * Failure is injected in {@link #copyFile(Context, SnapshotFileInfo, Path)}.
*/
private void injectTestFailure(final Context context, final SnapshotFileInfo inputInfo)
throws IOException {
- if (testFailures) {
- if (context.getConfiguration().getBoolean(CONF_TEST_RETRY, false)) {
- if (random == null) {
- random = new Random();
- }
-
- // FLAKY-TEST-WARN: lower is better, we can get some runs without the
- // retry, but at least we reduce the number of test failures due to
- // this test exception from the same map task.
- if (random.nextFloat() < 0.03) {
- throw new IOException("TEST RETRY FAILURE: Unable to copy input=" + inputInfo
- + " time=" + System.currentTimeMillis());
- }
- } else {
- context.getCounter(Counter.COPY_FAILED).increment(1);
- throw new IOException("TEST FAILURE: Unable to copy input=" + inputInfo);
- }
- }
+ if (!context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) return;
+ if (testing.injectedFailureCount >= testing.failuresCountToInject) return;
+ testing.injectedFailureCount++;
+ context.getCounter(Counter.COPY_FAILED).increment(1);
+ LOG.debug("Injecting failure. Count: " + testing.injectedFailureCount);
+ throw new IOException(String.format("TEST FAILURE (%d of max %d): Unable to copy input=%s",
+ testing.injectedFailureCount, testing.failuresCountToInject, inputInfo));
}
private void copyFile(final Context context, final SnapshotFileInfo inputInfo,
final Path outputPath) throws IOException {
- injectTestFailure(context, inputInfo);
-
// Get the file information
FileStatus inputStat = getSourceFileStatus(context, inputInfo);
@@ -318,6 +313,7 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
}
} finally {
in.close();
+ injectTestFailure(context, inputInfo);
}
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/da5fb27e/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
index 1beb518..52412d9 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.snapshot;
import static org.apache.hadoop.util.ToolRunner.run;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
import java.io.IOException;
import java.net.URI;
@@ -44,12 +45,10 @@ import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.SnapshotDescription;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.util.ToolRunner;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
@@ -57,6 +56,7 @@ import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
import org.junit.rules.TestRule;
/**
@@ -72,6 +72,9 @@ public class TestExportSnapshot {
protected final static byte[] FAMILY = Bytes.toBytes("cf");
+ @Rule
+ public final TestName testName = new TestName();
+
protected TableName tableName;
private byte[] emptySnapshotName;
private byte[] snapshotName;
@@ -85,17 +88,27 @@ public class TestExportSnapshot {
conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 6);
conf.setBoolean("hbase.master.enabletable.roundrobin", true);
conf.setInt("mapreduce.map.maxattempts", 10);
+ // If a single node has enough failures (default 3), resource manager will blacklist it.
+ // With only 2 nodes and tests injecting faults, we don't want that.
+ conf.setInt("mapreduce.job.maxtaskfailures.per.tracker", 100);
}
@BeforeClass
public static void setUpBeforeClass() throws Exception {
setUpBaseConf(TEST_UTIL.getConfiguration());
- TEST_UTIL.startMiniCluster(3);
+ // Setup separate test-data directory for MR cluster and set corresponding configurations.
+ // Otherwise, different test classes running MR cluster can step on each other.
+ TEST_UTIL.getDataTestDir();
+ TEST_UTIL.startMiniZKCluster();
+ TEST_UTIL.startMiniMapReduceCluster();
+ TEST_UTIL.startMiniHBaseCluster(1, 3);
}
@AfterClass
public static void tearDownAfterClass() throws Exception {
- TEST_UTIL.shutdownMiniCluster();
+ TEST_UTIL.shutdownMiniHBaseCluster();
+ TEST_UTIL.shutdownMiniMapReduceCluster();
+ TEST_UTIL.shutdownMiniZKCluster();
}
/**
@@ -105,10 +118,9 @@ public class TestExportSnapshot {
public void setUp() throws Exception {
this.admin = TEST_UTIL.getAdmin();
- long tid = System.currentTimeMillis();
- tableName = TableName.valueOf("testtb-" + tid);
- snapshotName = Bytes.toBytes("snaptb0-" + tid);
- emptySnapshotName = Bytes.toBytes("emptySnaptb0-" + tid);
+ tableName = TableName.valueOf("testtb-" + testName.getMethodName());
+ snapshotName = Bytes.toBytes("snaptb0-" + testName.getMethodName());
+ emptySnapshotName = Bytes.toBytes("emptySnaptb0-" + testName.getMethodName());
// create Table
createTable();
@@ -191,16 +203,16 @@ public class TestExportSnapshot {
Path copyDir, boolean overwrite) throws Exception {
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, targetName,
filesExpected, TEST_UTIL.getDefaultRootDirPath(), copyDir,
- overwrite, getBypassRegionPredicate());
+ overwrite, getBypassRegionPredicate(), true);
}
/**
- * Test ExportSnapshot
+ * Creates destination directory, runs ExportSnapshot() tool, and runs some verifications.
*/
protected static void testExportFileSystemState(final Configuration conf, final TableName tableName,
final byte[] snapshotName, final byte[] targetName, final int filesExpected,
final Path sourceDir, Path copyDir, final boolean overwrite,
- final RegionPredicate bypassregionPredicate) throws Exception {
+ final RegionPredicate bypassregionPredicate, boolean success) throws Exception {
URI hdfsUri = FileSystem.get(conf).getUri();
FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
copyDir = copyDir.makeQualified(fs);
@@ -218,7 +230,12 @@ public class TestExportSnapshot {
// Export Snapshot
int res = run(conf, new ExportSnapshot(), opts.toArray(new String[opts.size()]));
- assertEquals(0, res);
+ assertEquals(success ? 0 : 1, res);
+ if (!success) {
+ final Path targetDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(targetName));
+ assertFalse(fs.exists(new Path(copyDir, targetDir)));
+ return;
+ }
// Verify File-System state
FileStatus[] rootFiles = fs.listStatus(copyDir);
@@ -242,42 +259,35 @@ public class TestExportSnapshot {
}
/**
- * Check that ExportSnapshot will return a failure if something fails.
+ * Check that ExportSnapshot will succeed if something fails but the retry succeed.
*/
@Test
- public void testExportFailure() throws Exception {
- assertEquals(1, runExportAndInjectFailures(snapshotName, false));
+ public void testExportRetry() throws Exception {
+ Path copyDir = getLocalDestinationDir();
+ FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
+ copyDir = copyDir.makeQualified(fs);
+ Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
+ conf.setBoolean(ExportSnapshot.Testing.CONF_TEST_FAILURE, true);
+ conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 2);
+ conf.setInt("mapreduce.map.maxattempts", 3);
+ testExportFileSystemState(conf, tableName, snapshotName, snapshotName, tableNumFiles,
+ TEST_UTIL.getDefaultRootDirPath(), copyDir, true, getBypassRegionPredicate(), true);
}
/**
- * Check that ExportSnapshot will succede if something fails but the retry succede.
+ * Check that ExportSnapshot will fail if we inject failure more times than MR will retry.
*/
@Test
- public void testExportRetry() throws Exception {
- assertEquals(0, runExportAndInjectFailures(snapshotName, true));
- }
-
- /*
- * Execute the ExportSnapshot job injecting failures
- */
- private int runExportAndInjectFailures(final byte[] snapshotName, boolean retry)
- throws Exception {
+ public void testExportFailure() throws Exception {
Path copyDir = getLocalDestinationDir();
- URI hdfsUri = FileSystem.get(TEST_UTIL.getConfiguration()).getUri();
FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
copyDir = copyDir.makeQualified(fs);
-
Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
- conf.setBoolean(ExportSnapshot.CONF_TEST_FAILURE, true);
- conf.setBoolean(ExportSnapshot.CONF_TEST_RETRY, retry);
- if (!retry) {
- conf.setInt("mapreduce.map.maxattempts", 3);
- }
- // Export Snapshot
- Path sourceDir = TEST_UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
- String[] args = new String[] { "--snapshot", Bytes.toString(snapshotName),
- "--copy-from", sourceDir.toString(), "--copy-to", copyDir.toString() };
- return ToolRunner.run(conf, new ExportSnapshot(), args);
+ conf.setBoolean(ExportSnapshot.Testing.CONF_TEST_FAILURE, true);
+ conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 4);
+ conf.setInt("mapreduce.map.maxattempts", 3);
+ testExportFileSystemState(conf, tableName, snapshotName, snapshotName, tableNumFiles,
+ TEST_UTIL.getDefaultRootDirPath(), copyDir, true, getBypassRegionPredicate(), false);
}
/*
http://git-wip-us.apache.org/repos/asf/hbase/blob/da5fb27e/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
index e2d7c11..cd5ff6c 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotNoCluster.java
@@ -104,7 +104,7 @@ public class TestExportSnapshotNoCluster {
TableName tableName = builder.getTableDescriptor().getTableName();
TestExportSnapshot.testExportFileSystemState(TEST_UTIL.getConfiguration(),
tableName, snapshotName, snapshotName, snapshotFilesCount,
- testDir, getDestinationDir(), false, null);
+ testDir, getDestinationDir(), false, null, true);
}
private Path getDestinationDir() {
http://git-wip-us.apache.org/repos/asf/hbase/blob/da5fb27e/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
index c375b0a..55686b1 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobExportSnapshot.java
@@ -45,7 +45,12 @@ public class TestMobExportSnapshot extends TestExportSnapshot {
@BeforeClass
public static void setUpBeforeClass() throws Exception {
setUpBaseConf(TEST_UTIL.getConfiguration());
- TEST_UTIL.startMiniCluster(3);
+ // Setup separate test-data directory for MR cluster and set corresponding configurations.
+ // Otherwise, different test classes running MR cluster can step on each other.
+ TEST_UTIL.getDataTestDir();
+ TEST_UTIL.startMiniZKCluster();
+ TEST_UTIL.startMiniMapReduceCluster();
+ TEST_UTIL.startMiniHBaseCluster(1, 3);
}
@Override
http://git-wip-us.apache.org/repos/asf/hbase/blob/da5fb27e/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
index 8154995..c0f31d5 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestMobSecureExportSnapshot.java
@@ -37,6 +37,9 @@ public class TestMobSecureExportSnapshot extends TestMobExportSnapshot {
@BeforeClass
public static void setUpBeforeClass() throws Exception {
setUpBaseConf(TEST_UTIL.getConfiguration());
+ // Setup separate test-data directory for MR cluster and set corresponding configurations.
+ // Otherwise, different test classes running MR cluster can step on each other.
+ TEST_UTIL.getDataTestDir();
// set the always on security provider
UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
@@ -45,7 +48,9 @@ public class TestMobSecureExportSnapshot extends TestMobExportSnapshot {
// setup configuration
SecureTestUtil.enableSecurity(TEST_UTIL.getConfiguration());
- TEST_UTIL.startMiniCluster(3);
+ TEST_UTIL.startMiniZKCluster();
+ TEST_UTIL.startMiniMapReduceCluster();
+ TEST_UTIL.startMiniHBaseCluster(1, 3);
// Wait for the ACL table to become available
TEST_UTIL.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
http://git-wip-us.apache.org/repos/asf/hbase/blob/da5fb27e/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
index f335e4e..4d35b71 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSecureExportSnapshot.java
@@ -42,6 +42,9 @@ public class TestSecureExportSnapshot extends TestExportSnapshot {
@BeforeClass
public static void setUpBeforeClass() throws Exception {
setUpBaseConf(TEST_UTIL.getConfiguration());
+ // Setup separate test-data directory for MR cluster and set corresponding configurations.
+ // Otherwise, different test classes running MR cluster can step on each other.
+ TEST_UTIL.getDataTestDir();
// set the always on security provider
UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
@@ -50,7 +53,9 @@ public class TestSecureExportSnapshot extends TestExportSnapshot {
// setup configuration
SecureTestUtil.enableSecurity(TEST_UTIL.getConfiguration());
- TEST_UTIL.startMiniCluster(3);
+ TEST_UTIL.startMiniZKCluster();
+ TEST_UTIL.startMiniMapReduceCluster();
+ TEST_UTIL.startMiniHBaseCluster(1, 3);
// Wait for the ACL table to become available
TEST_UTIL.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);