You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by ct...@apache.org on 2021/02/25 18:35:15 UTC
[accumulo] branch main updated: Fix #1791 - Fix SuspendedTabletsIT
(#1888)
This is an automated email from the ASF dual-hosted git repository.
ctubbsii pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push:
new 05abf9f Fix #1791 - Fix SuspendedTabletsIT (#1888)
05abf9f is described below
commit 05abf9fefbb9de46f7d27b6f93d1a6a839dc6963
Author: Dom G <47...@users.noreply.github.com>
AuthorDate: Thu Feb 25 13:35:09 2021 -0500
Fix #1791 - Fix SuspendedTabletsIT (#1888)
* ensure metadata table tablets are on a single server
* removed server hosting metadata tablets from list to shutdown
* pre-split tables at creation time instead of immediately after
---
.../accumulo/test/manager/SuspendedTabletsIT.java | 71 +++++++++++++---------
1 file changed, 42 insertions(+), 29 deletions(-)
diff --git a/test/src/main/java/org/apache/accumulo/test/manager/SuspendedTabletsIT.java b/test/src/main/java/org/apache/accumulo/test/manager/SuspendedTabletsIT.java
index 92f6401..770648e 100644
--- a/test/src/main/java/org/apache/accumulo/test/manager/SuspendedTabletsIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/manager/SuspendedTabletsIT.java
@@ -19,10 +19,8 @@
package org.apache.accumulo.test.manager;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
-import static java.util.concurrent.TimeUnit.NANOSECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.security.SecureRandom;
@@ -45,8 +43,10 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.accumulo.core.client.Accumulo;
import org.apache.accumulo.core.client.AccumuloClient;
+import org.apache.accumulo.core.client.admin.NewTableConfiguration;
import org.apache.accumulo.core.clientImpl.ClientContext;
import org.apache.accumulo.core.clientImpl.ManagerClient;
+import org.apache.accumulo.core.clientImpl.TabletLocator;
import org.apache.accumulo.core.conf.ClientProperty;
import org.apache.accumulo.core.conf.Property;
import org.apache.accumulo.core.data.Range;
@@ -54,6 +54,7 @@ import org.apache.accumulo.core.dataImpl.KeyExtent;
import org.apache.accumulo.core.metadata.MetadataTable;
import org.apache.accumulo.core.metadata.TServerInstance;
import org.apache.accumulo.core.metadata.TabletLocationState;
+import org.apache.accumulo.core.spi.balancer.HostRegexTableLoadBalancer;
import org.apache.accumulo.core.util.HostAndPort;
import org.apache.accumulo.minicluster.ServerType;
import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
@@ -77,7 +78,7 @@ public class SuspendedTabletsIT extends ConfigurableMacBase {
private static ExecutorService THREAD_POOL;
public static final int TSERVERS = 3;
- public static final long SUSPEND_DURATION = 20;
+ public static final long SUSPEND_DURATION = 80;
public static final int TABLETS = 30;
@Override
@@ -91,6 +92,11 @@ public class SuspendedTabletsIT extends ConfigurableMacBase {
cfg.setClientProperty(ClientProperty.INSTANCE_ZOOKEEPERS_TIMEOUT, "5s");
cfg.setProperty(Property.INSTANCE_ZK_TIMEOUT, "5s");
cfg.setNumTservers(TSERVERS);
+ // config custom balancer to keep all metadata on one server
+ cfg.setProperty(HostRegexTableLoadBalancer.HOST_BALANCER_PREFIX + MetadataTable.NAME, "*");
+ cfg.setProperty(HostRegexTableLoadBalancer.HOST_BALANCER_OOB_CHECK_KEY, "7s");
+ cfg.setProperty(Property.TABLE_LOAD_BALANCER.getKey(),
+ HostRegexTableLoadBalancer.class.getName());
}
@Test
@@ -115,13 +121,30 @@ public class SuspendedTabletsIT extends ConfigurableMacBase {
// Run the test body. When we get to the point where we need tservers to go away, stop them via
// a clean shutdown.
suspensionTestBody((ctx, locs, count) -> {
- Set<TServerInstance> tserversSet = new HashSet<>();
+ Set<TServerInstance> tserverSet = new HashSet<>();
+ Set<TServerInstance> metadataServerSet = new HashSet<>();
+
+ TabletLocator tl = TabletLocator.getLocator(ctx, MetadataTable.ID);
for (TabletLocationState tls : locs.locationStates.values()) {
if (tls.current != null) {
- tserversSet.add(tls.current);
+ // add to set of all servers
+ tserverSet.add(tls.current);
+
+ // get server that the current tablets metadata is on
+ TabletLocator.TabletLocation tab =
+ tl.locateTablet(ctx, tls.extent.toMetaRow(), false, false);
+ // add it to the set of servers with metadata
+ metadataServerSet
+ .add(new TServerInstance(tab.tablet_location, Long.valueOf(tab.tablet_session, 16)));
}
}
- List<TServerInstance> tserversList = new ArrayList<>(tserversSet);
+
+ // remove servers with metadata on them from the list of servers to be shutdown
+ tserverSet.removeAll(metadataServerSet);
+
+ assertEquals("Expecting two tServers in shutdown-list", 2, tserverSet.size());
+
+ List<TServerInstance> tserversList = new ArrayList<>(tserverSet);
Collections.shuffle(tserversList, RANDOM);
for (int i1 = 0; i1 < count; ++i1) {
@@ -168,14 +191,13 @@ public class SuspendedTabletsIT extends ConfigurableMacBase {
String tableName = getUniqueNames(1)[0];
- // Create a table with a bunch of splits
- log.info("Creating table " + tableName);
- ctx.tableOperations().create(tableName);
SortedSet<Text> splitPoints = new TreeSet<>();
for (int i = 1; i < TABLETS; ++i) {
splitPoints.add(new Text("" + i));
}
- ctx.tableOperations().addSplits(tableName, splitPoints);
+ log.info("Creating table " + tableName);
+ NewTableConfiguration ntc = new NewTableConfiguration().withSplits(splitPoints);
+ ctx.tableOperations().create(tableName, ntc);
// Wait for all of the tablets to hosted ...
log.info("Waiting on hosting and balance");
@@ -188,8 +210,8 @@ public class SuspendedTabletsIT extends ConfigurableMacBase {
// ... and balanced.
ctx.instanceOperations().waitForBalance();
do {
- // Give at least another 5 seconds for migrations to finish up
- Thread.sleep(5000);
+ // Give at least another 15 seconds for migrations to finish up
+ Thread.sleep(15000);
ds = TabletLocations.retrieve(ctx, tableName);
} while (ds.hostedCount != TABLETS);
@@ -203,29 +225,23 @@ public class SuspendedTabletsIT extends ConfigurableMacBase {
log.info("Eliminating tablet servers");
serverStopper.eliminateTabletServers(ctx, beforeDeathState, 2);
- // Eventually some tablets will be suspended.
+ // All tablets should be either hosted or suspended.
log.info("Waiting on suspended tablets");
- ds = TabletLocations.retrieve(ctx, tableName);
- // Until we can scan the metadata table, the manager probably can't either, so won't have been
- // able to suspend the tablets.
- // So we note the time that we were first able to successfully scan the metadata table.
- long killTime = System.nanoTime();
- while (ds.suspended.keySet().size() != 2) {
+ do {
Thread.sleep(1000);
ds = TabletLocations.retrieve(ctx, tableName);
- }
+ } while (ds.suspended.keySet().size() != 2
+ && (ds.suspendedCount + ds.hostedCount) != TABLETS);
SetMultimap<HostAndPort,KeyExtent> deadTabletsByServer = ds.suspended;
- // By this point, all tablets should be either hosted or suspended. All suspended tablets
- // should
- // "belong" to the dead tablet servers, and should be in exactly the same place as before any
- // tserver death.
+ // All suspended tablets should "belong" to the dead tablet servers, and should be in exactly
+ // the same place as before any tserver death.
for (HostAndPort server : deadTabletsByServer.keySet()) {
- assertEquals(deadTabletsByServer.get(server), beforeDeathState.hosted.get(server));
+ // Comparing pre-death, hosted tablets to suspended tablets on a server
+ assertEquals(beforeDeathState.hosted.get(server), deadTabletsByServer.get(server));
}
assertEquals(TABLETS, ds.hostedCount + ds.suspendedCount);
-
// Restart the first tablet server, making sure it ends up on the same port
HostAndPort restartedServer = deadTabletsByServer.keySet().iterator().next();
log.info("Restarting " + restartedServer);
@@ -248,9 +264,6 @@ public class SuspendedTabletsIT extends ConfigurableMacBase {
Thread.sleep(1000);
ds = TabletLocations.retrieve(ctx, tableName);
}
-
- long recoverTime = System.nanoTime();
- assertTrue(recoverTime - killTime >= NANOSECONDS.convert(SUSPEND_DURATION, SECONDS));
}
}