You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by bh...@apache.org on 2014/01/16 21:16:53 UTC

[04/12] git commit: ACCUMULO-2198 Concurrent randomwalk: add teardown, fix server balance check

ACCUMULO-2198 Concurrent randomwalk: add teardown, fix server balance check

The Concurrent randomwalk test had been using a test node property to remember the
last time when servers were unbalanced, but this property was not getting cleaned up
between runs. Therefore, if a new Concurrent test was started some time later, it
would pick up the old timestamp property from the last run. This commit adds removal
of the property during test teardown, and also moves the tracking from a node
property to test state.

In addition, the test logic would reset the timestamp every time servers were found
unbalanced, provided the 15-minute allowance hadn't expired. This commit fixes that
issue as well. This could lead to more, correct, reports of unbalanced servers.

Lastly, the test in 1.5.x requires three checks for unbalanced servers to fail before
failing the test. This commit backports that requirement to 1.4.x.

The timestamp reset and three-check fixes were added to 1.5.x in commit 0ee7e5a8.


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/cd4eac0d
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/cd4eac0d
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/cd4eac0d

Branch: refs/heads/1.6.0-SNAPSHOT
Commit: cd4eac0d7e2820321db9fc9cdfc8dc89f7dd53d2
Parents: 91be551
Author: Bill Havanki <bh...@cloudera.com>
Authored: Thu Jan 16 09:00:34 2014 -0500
Committer: Bill Havanki <bh...@cloudera.com>
Committed: Thu Jan 16 14:35:39 2014 -0500

----------------------------------------------------------------------
 .../accumulo/server/test/randomwalk/State.java  |  4 +++
 .../randomwalk/concurrent/CheckBalance.java     | 31 +++++++++++++++-----
 .../concurrent/ConcurrentFixture.java           |  5 +++-
 3 files changed, 31 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/cd4eac0d/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/State.java
----------------------------------------------------------------------
diff --git a/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/State.java b/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/State.java
index f9bd84e..5a53340 100644
--- a/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/State.java
+++ b/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/State.java
@@ -64,6 +64,10 @@ public class State {
   public void set(String key, Object value) {
     stateMap.put(key, value);
   }
+
+  public void remove(String key) {
+    stateMap.remove(key);
+  }
   
   public Object get(String key) {
     if (stateMap.containsKey(key) == false) {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/cd4eac0d/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/CheckBalance.java
----------------------------------------------------------------------
diff --git a/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/CheckBalance.java b/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/CheckBalance.java
index beb8327..d00e2b4 100644
--- a/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/CheckBalance.java
+++ b/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/CheckBalance.java
@@ -33,13 +33,15 @@ import org.apache.accumulo.server.test.randomwalk.Test;
  */
 public class CheckBalance extends Test {
   
-  private static final String LAST_UNBALANCED_TIME = "lastUnbalancedTime";
+  static final String LAST_UNBALANCED_TIME = "lastUnbalancedTime";
+  static final String UNBALANCED_COUNT = "unbalancedCount";
 
   /* (non-Javadoc)
    * @see org.apache.accumulo.server.test.randomwalk.Node#visit(org.apache.accumulo.server.test.randomwalk.State, java.util.Properties)
    */
   @Override
   public void visit(State state, Properties props) throws Exception {
+    log.debug("checking balance");
     Map<String,Long> counts = new HashMap<String,Long>();
     Scanner scanner = state.getConnector().createScanner(Constants.METADATA_TABLE_NAME, Constants.NO_AUTHS);
     scanner.fetchColumnFamily(Constants.METADATA_CURRENT_LOCATION_COLUMN_FAMILY);
@@ -57,25 +59,38 @@ public class CheckBalance extends Test {
     final double average = total / counts.size();
     
     // Check for even # of tablets on each node
+    double maxDifference = Math.max(1, average / 5);
+    String unbalancedLocation = null;
+    long lastCount = 0L;
     boolean balanced = true;
     for (Entry<String,Long> entry : counts.entrySet()) {
-      if (Math.abs(entry.getValue().longValue() - average) > Math.max(1, average / 5)) {
+      lastCount = entry.getValue().longValue();
+      if (Math.abs(lastCount - average) > maxDifference) {
         balanced = false;
+        unbalancedLocation = entry.getKey();
         break;
       }
     }
     
     // It is expected that the number of tablets will be uneven for short
     // periods of time. Don't complain unless we've seen it only unbalanced
-    // over a 15 minute period.
+    // over a 15 minute period and it's been at least three checks.
     if (!balanced) {
-      String last = props.getProperty(LAST_UNBALANCED_TIME);
-      if (last != null && System.currentTimeMillis() - Long.parseLong(last) > 15 * 60 * 1000) {
-        throw new Exception("servers are unbalanced!");
+      Long last = state.getLong(LAST_UNBALANCED_TIME);
+      if (last != null && System.currentTimeMillis() - last > 15 * 60 * 1000) {
+        Integer count = state.getInteger(UNBALANCED_COUNT);
+        if (count == null)
+          count = Integer.valueOf(0);
+        if (count > 3)
+          throw new Exception("servers are unbalanced! location " + unbalancedLocation + " count " + lastCount + " too far from average " + average);
+        count++;
+        state.set(UNBALANCED_COUNT, count);
+      } else if (last == null) {
+        state.set(LAST_UNBALANCED_TIME, System.currentTimeMillis());
       }
-      props.setProperty(LAST_UNBALANCED_TIME, Long.toString(System.currentTimeMillis()));
     } else {
-      props.remove(LAST_UNBALANCED_TIME);
+      state.remove(LAST_UNBALANCED_TIME);
+      state.remove(UNBALANCED_COUNT);
     }
   }
   

http://git-wip-us.apache.org/repos/asf/accumulo/blob/cd4eac0d/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/ConcurrentFixture.java
----------------------------------------------------------------------
diff --git a/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/ConcurrentFixture.java b/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/ConcurrentFixture.java
index 62fac56..3606d57 100644
--- a/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/ConcurrentFixture.java
+++ b/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/ConcurrentFixture.java
@@ -31,6 +31,9 @@ public class ConcurrentFixture extends Fixture {
   public void setUp(State state) throws Exception {}
   
   @Override
-  public void tearDown(State state) throws Exception {}
+  public void tearDown(State state) throws Exception {
+    state.remove(CheckBalance.LAST_UNBALANCED_TIME);
+    state.remove(CheckBalance.UNBALANCED_COUNT);
+  }
   
 }