You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by ec...@apache.org on 2014/09/29 15:36:47 UTC

[1/2] git commit: ACCUMULO-2480 make the tserver give up and die if openning the WAL experiences 5 errors in 10 seconds

Repository: accumulo
Updated Branches:
  refs/heads/master 159d1b99d -> 1dfe826ce


ACCUMULO-2480 make the tserver give up and die if openning the WAL experiences 5 errors in 10 seconds


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/72156b82
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/72156b82
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/72156b82

Branch: refs/heads/master
Commit: 72156b82ecc05aa6dc072ab8d5dce5f328b140c8
Parents: a697516
Author: Eric C. Newton <er...@gmail.com>
Authored: Mon Sep 29 09:36:11 2014 -0400
Committer: Eric C. Newton <er...@gmail.com>
Committed: Mon Sep 29 09:36:11 2014 -0400

----------------------------------------------------------------------
 .../impl/MiniAccumuloClusterImpl.java           |  4 ++
 .../apache/accumulo/tserver/log/DfsLogger.java  | 15 ++--
 .../tserver/log/TabletServerLogger.java         | 17 +++++
 .../accumulo/test/TabletServerGivesUpIT.java    | 73 ++++++++++++++++++++
 4 files changed, 101 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/72156b82/minicluster/src/main/java/org/apache/accumulo/minicluster/impl/MiniAccumuloClusterImpl.java
----------------------------------------------------------------------
diff --git a/minicluster/src/main/java/org/apache/accumulo/minicluster/impl/MiniAccumuloClusterImpl.java b/minicluster/src/main/java/org/apache/accumulo/minicluster/impl/MiniAccumuloClusterImpl.java
index c9031eb..1fb5901 100644
--- a/minicluster/src/main/java/org/apache/accumulo/minicluster/impl/MiniAccumuloClusterImpl.java
+++ b/minicluster/src/main/java/org/apache/accumulo/minicluster/impl/MiniAccumuloClusterImpl.java
@@ -778,4 +778,8 @@ public class MiniAccumuloClusterImpl implements AccumuloCluster {
     }
     return stats;
   }
+
+  public MiniDFSCluster getMiniDfs() {
+    return this.miniDFS;
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/72156b82/server/tserver/src/main/java/org/apache/accumulo/tserver/log/DfsLogger.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/DfsLogger.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/DfsLogger.java
index 6260ec7..8de2b25 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/DfsLogger.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/DfsLogger.java
@@ -16,12 +16,6 @@
  */
 package org.apache.accumulo.tserver.log;
 
-import static org.apache.accumulo.tserver.logger.LogEvents.COMPACTION_FINISH;
-import static org.apache.accumulo.tserver.logger.LogEvents.COMPACTION_START;
-import static org.apache.accumulo.tserver.logger.LogEvents.DEFINE_TABLET;
-import static org.apache.accumulo.tserver.logger.LogEvents.MANY_MUTATIONS;
-import static org.apache.accumulo.tserver.logger.LogEvents.OPEN;
-
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
@@ -41,6 +35,7 @@ import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.atomic.AtomicLong;
 
+import com.google.common.base.Joiner;
 import org.apache.accumulo.core.client.Durability;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.conf.Property;
@@ -65,7 +60,11 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.Logger;
 
-import com.google.common.base.Joiner;
+import static org.apache.accumulo.tserver.logger.LogEvents.COMPACTION_FINISH;
+import static org.apache.accumulo.tserver.logger.LogEvents.COMPACTION_START;
+import static org.apache.accumulo.tserver.logger.LogEvents.DEFINE_TABLET;
+import static org.apache.accumulo.tserver.logger.LogEvents.MANY_MUTATIONS;
+import static org.apache.accumulo.tserver.logger.LogEvents.OPEN;
 
 /**
  * Wrap a connection to a logger.
@@ -130,7 +129,7 @@ public class DfsLogger {
   private static final LogFileValue EMPTY = new LogFileValue();
 
   private boolean closed = false;
-
+  
   private class LogSyncingTask implements Runnable {
 
     @Override

http://git-wip-us.apache.org/repos/asf/accumulo/blob/72156b82/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
index 243b405..86ae596 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
@@ -26,11 +26,16 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
+import org.apache.accumulo.server.util.Halt;
+
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
 import org.apache.accumulo.core.client.Durability;
 import org.apache.accumulo.core.data.KeyExtent;
 import org.apache.accumulo.core.data.Mutation;
@@ -86,6 +91,14 @@ public class TabletServerLogger {
 
   private final AtomicLong syncCounter;
   private final AtomicLong flushCounter;
+  
+  private final static int HALT_AFTER_ERROR_COUNT = 5;
+  private final Cache<Long, Object> walErrors;
+  {
+    // Die if we get 5 WAL creation errors in 10 seconds
+    walErrors = CacheBuilder.newBuilder().maximumSize(HALT_AFTER_ERROR_COUNT).expireAfterWrite(10, TimeUnit.SECONDS).build();
+  }
+
 
   static private abstract class TestCallWithWriteLock {
     abstract boolean test();
@@ -194,6 +207,10 @@ public class TabletServerLogger {
       logSetId.incrementAndGet();
       return;
     } catch (Exception t) {
+      walErrors.put(System.currentTimeMillis(), "");
+      if (walErrors.size() >= HALT_AFTER_ERROR_COUNT) {
+        Halt.halt("Experienced too many errors creating WALs, giving up");
+      }
       throw new RuntimeException(t);
     }
   }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/72156b82/test/src/main/java/org/apache/accumulo/test/TabletServerGivesUpIT.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/TabletServerGivesUpIT.java b/test/src/main/java/org/apache/accumulo/test/TabletServerGivesUpIT.java
new file mode 100644
index 0000000..e2e5ac9
--- /dev/null
+++ b/test/src/main/java/org/apache/accumulo/test/TabletServerGivesUpIT.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.test;
+
+import java.util.TreeSet;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.util.UtilWaitThread;
+import org.apache.accumulo.minicluster.impl.MiniAccumuloConfigImpl;
+import org.apache.accumulo.test.functional.ConfigurableMacIT;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+// ACCUMULO-2480
+public class TabletServerGivesUpIT extends ConfigurableMacIT {
+  
+  @Override
+  public void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) {
+    cfg.useMiniDFS(true);
+    cfg.setNumTservers(1);
+    cfg.setProperty(Property.INSTANCE_ZK_TIMEOUT, "5s");
+  }
+
+  @Test(timeout = 30 * 1000)
+  public void test() throws Exception {
+    final Connector conn = this.getConnector();
+    // Yes, there's a tabletserver
+    assertEquals(1, conn.instanceOperations().getTabletServers().size());
+    final String tableName = getUniqueNames(1)[0];
+    conn.tableOperations().create(tableName);
+    // Kill dfs
+    cluster.getMiniDfs().shutdown();
+    // ask the tserver to do something
+    final AtomicReference<Exception> ex = new AtomicReference<>(); 
+    Thread splitter = new Thread() {
+      public void run() {
+        try {
+          TreeSet<Text> splits = new TreeSet<>();
+          splits.add(new Text("X"));
+          conn.tableOperations().addSplits(tableName, splits);
+        } catch (Exception e) {
+          ex.set(e);
+        }
+      }
+    };
+    splitter.start();
+    // wait for the tserver to give up on writing to the WAL
+    while (conn.instanceOperations().getTabletServers().size() == 1) {
+      UtilWaitThread.sleep(1000);
+    }
+  }
+  
+}


[2/2] git commit: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/accumulo

Posted by ec...@apache.org.
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/accumulo


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/1dfe826c
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/1dfe826c
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/1dfe826c

Branch: refs/heads/master
Commit: 1dfe826ced4fc83aaefb727f889cd3b53dbd86a2
Parents: 72156b8 159d1b9
Author: Eric C. Newton <er...@gmail.com>
Authored: Mon Sep 29 09:36:33 2014 -0400
Committer: Eric C. Newton <er...@gmail.com>
Committed: Mon Sep 29 09:36:33 2014 -0400

----------------------------------------------------------------------
 .../accumulo/master/FateServiceHandler.java     | 156 +++++++++++++++++--
 .../test/BadDeleteMarkersCreatedIT.java         |  27 +++-
 .../test/ConfigurableMajorCompactionIT.java     |   7 +-
 .../accumulo/test/functional/ExamplesIT.java    | 118 ++++++--------
 .../accumulo/test/functional/LargeRowIT.java    |  89 ++++++-----
 .../accumulo/test/functional/MergeIT.java       |  11 +-
 .../accumulo/test/functional/PermissionsIT.java |   9 +-
 .../test/replication/CyclicReplicationIT.java   |  11 +-
 .../test/replication/ReplicationIT.java         | 148 +++++++++---------
 .../UnorderedWorkAssignerReplicationIT.java     |  33 ++--
 10 files changed, 396 insertions(+), 213 deletions(-)
----------------------------------------------------------------------



Re: [1/2] git commit: ACCUMULO-2480 make the tserver give up and die if openning the WAL experiences 5 errors in 10 seconds

Posted by Keith Turner <ke...@deenlo.com>.
On Mon, Sep 29, 2014 at 9:36 AM, <ec...@apache.org> wrote:

> Repository: accumulo
> Updated Branches:
>   refs/heads/master 159d1b99d -> 1dfe826ce
>
>
> ACCUMULO-2480 make the tserver give up and die if openning the WAL
> experiences 5 errors in 10 seconds
>
>
> Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
> Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/72156b82
> Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/72156b82
> Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/72156b82
>
> Branch: refs/heads/master
> Commit: 72156b82ecc05aa6dc072ab8d5dce5f328b140c8
> Parents: a697516
> Author: Eric C. Newton <er...@gmail.com>
> Authored: Mon Sep 29 09:36:11 2014 -0400
> Committer: Eric C. Newton <er...@gmail.com>
> Committed: Mon Sep 29 09:36:11 2014 -0400
>
> ----------------------------------------------------------------------
>  .../impl/MiniAccumuloClusterImpl.java           |  4 ++
>  .../apache/accumulo/tserver/log/DfsLogger.java  | 15 ++--
>  .../tserver/log/TabletServerLogger.java         | 17 +++++
>  .../accumulo/test/TabletServerGivesUpIT.java    | 73 ++++++++++++++++++++
>  4 files changed, 101 insertions(+), 8 deletions(-)
> ----------------------------------------------------------------------
>
>
>
> http://git-wip-us.apache.org/repos/asf/accumulo/blob/72156b82/minicluster/src/main/java/org/apache/accumulo/minicluster/impl/MiniAccumuloClusterImpl.java
> ----------------------------------------------------------------------
> diff --git
> a/minicluster/src/main/java/org/apache/accumulo/minicluster/impl/MiniAccumuloClusterImpl.java
> b/minicluster/src/main/java/org/apache/accumulo/minicluster/impl/MiniAccumuloClusterImpl.java
> index c9031eb..1fb5901 100644
> ---
> a/minicluster/src/main/java/org/apache/accumulo/minicluster/impl/MiniAccumuloClusterImpl.java
> +++
> b/minicluster/src/main/java/org/apache/accumulo/minicluster/impl/MiniAccumuloClusterImpl.java
> @@ -778,4 +778,8 @@ public class MiniAccumuloClusterImpl implements
> AccumuloCluster {
>      }
>      return stats;
>    }
> +
> +  public MiniDFSCluster getMiniDfs() {
> +    return this.miniDFS;
> +  }
>  }
>
>
> http://git-wip-us.apache.org/repos/asf/accumulo/blob/72156b82/server/tserver/src/main/java/org/apache/accumulo/tserver/log/DfsLogger.java
> ----------------------------------------------------------------------
> diff --git
> a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/DfsLogger.java
> b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/DfsLogger.java
> index 6260ec7..8de2b25 100644
> ---
> a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/DfsLogger.java
> +++
> b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/DfsLogger.java
> @@ -16,12 +16,6 @@
>   */
>  package org.apache.accumulo.tserver.log;
>
> -import static
> org.apache.accumulo.tserver.logger.LogEvents.COMPACTION_FINISH;
> -import static
> org.apache.accumulo.tserver.logger.LogEvents.COMPACTION_START;
> -import static org.apache.accumulo.tserver.logger.LogEvents.DEFINE_TABLET;
> -import static org.apache.accumulo.tserver.logger.LogEvents.MANY_MUTATIONS;
> -import static org.apache.accumulo.tserver.logger.LogEvents.OPEN;
> -
>  import java.io.DataInputStream;
>  import java.io.DataOutputStream;
>  import java.io.IOException;
> @@ -41,6 +35,7 @@ import java.util.concurrent.CountDownLatch;
>  import java.util.concurrent.LinkedBlockingQueue;
>  import java.util.concurrent.atomic.AtomicLong;
>
> +import com.google.common.base.Joiner;
>  import org.apache.accumulo.core.client.Durability;
>  import org.apache.accumulo.core.conf.AccumuloConfiguration;
>  import org.apache.accumulo.core.conf.Property;
> @@ -65,7 +60,11 @@ import org.apache.hadoop.fs.FSDataOutputStream;
>  import org.apache.hadoop.fs.Path;
>  import org.apache.log4j.Logger;
>
> -import com.google.common.base.Joiner;
> +import static
> org.apache.accumulo.tserver.logger.LogEvents.COMPACTION_FINISH;
> +import static
> org.apache.accumulo.tserver.logger.LogEvents.COMPACTION_START;
> +import static org.apache.accumulo.tserver.logger.LogEvents.DEFINE_TABLET;
> +import static org.apache.accumulo.tserver.logger.LogEvents.MANY_MUTATIONS;
> +import static org.apache.accumulo.tserver.logger.LogEvents.OPEN;
>
>  /**
>   * Wrap a connection to a logger.
> @@ -130,7 +129,7 @@ public class DfsLogger {
>    private static final LogFileValue EMPTY = new LogFileValue();
>
>    private boolean closed = false;
> -
> +
>    private class LogSyncingTask implements Runnable {
>
>      @Override
>
>
> http://git-wip-us.apache.org/repos/asf/accumulo/blob/72156b82/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
> ----------------------------------------------------------------------
> diff --git
> a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
> b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
> index 243b405..86ae596 100644
> ---
> a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
> +++
> b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
> @@ -26,11 +26,16 @@ import java.util.List;
>  import java.util.Map;
>  import java.util.Map.Entry;
>  import java.util.Set;
> +import java.util.concurrent.TimeUnit;
>  import java.util.concurrent.atomic.AtomicInteger;
>  import java.util.concurrent.atomic.AtomicLong;
>  import java.util.concurrent.locks.ReadWriteLock;
>  import java.util.concurrent.locks.ReentrantReadWriteLock;
>
> +import org.apache.accumulo.server.util.Halt;
> +
> +import com.google.common.cache.Cache;
> +import com.google.common.cache.CacheBuilder;
>  import org.apache.accumulo.core.client.Durability;
>  import org.apache.accumulo.core.data.KeyExtent;
>  import org.apache.accumulo.core.data.Mutation;
> @@ -86,6 +91,14 @@ public class TabletServerLogger {
>
>    private final AtomicLong syncCounter;
>    private final AtomicLong flushCounter;
> +
> +  private final static int HALT_AFTER_ERROR_COUNT = 5;
> +  private final Cache<Long, Object> walErrors;
> +  {
> +    // Die if we get 5 WAL creation errors in 10 seconds
> +    walErrors =
> CacheBuilder.newBuilder().maximumSize(HALT_AFTER_ERROR_COUNT).expireAfterWrite(10,
> TimeUnit.SECONDS).build();
> +  }
> +
>
>    static private abstract class TestCallWithWriteLock {
>      abstract boolean test();
> @@ -194,6 +207,10 @@ public class TabletServerLogger {
>        logSetId.incrementAndGet();
>        return;
>      } catch (Exception t) {
> +      walErrors.put(System.currentTimeMillis(), "");
> +      if (walErrors.size() >= HALT_AFTER_ERROR_COUNT) {
>

This is a bit aggressive.  Could bring all tservers down if there is a
temporary namenode hiccup unrelated to the problem seen in ACCUMULO-2480.
Is there something more specific we could look for in the exception, that
may indicate the filesystem is closed?


> +        Halt.halt("Experienced too many errors creating WALs, giving up");
> +      }
>        throw new RuntimeException(t);
>      }
>    }
>
>
> http://git-wip-us.apache.org/repos/asf/accumulo/blob/72156b82/test/src/main/java/org/apache/accumulo/test/TabletServerGivesUpIT.java
> ----------------------------------------------------------------------
> diff --git
> a/test/src/main/java/org/apache/accumulo/test/TabletServerGivesUpIT.java
> b/test/src/main/java/org/apache/accumulo/test/TabletServerGivesUpIT.java
> new file mode 100644
> index 0000000..e2e5ac9
> --- /dev/null
> +++
> b/test/src/main/java/org/apache/accumulo/test/TabletServerGivesUpIT.java
> @@ -0,0 +1,73 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License.  You may obtain a copy of the License at
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +package org.apache.accumulo.test;
> +
> +import java.util.TreeSet;
> +import java.util.concurrent.atomic.AtomicReference;
> +
> +import org.apache.accumulo.core.client.Connector;
> +import org.apache.accumulo.core.conf.Property;
> +import org.apache.accumulo.core.util.UtilWaitThread;
> +import org.apache.accumulo.minicluster.impl.MiniAccumuloConfigImpl;
> +import org.apache.accumulo.test.functional.ConfigurableMacIT;
> +import org.apache.hadoop.conf.Configuration;
> +import org.apache.hadoop.io.Text;
> +import org.junit.Test;
> +
> +import static org.junit.Assert.assertEquals;
> +import static org.junit.Assert.assertTrue;
> +
> +// ACCUMULO-2480
> +public class TabletServerGivesUpIT extends ConfigurableMacIT {
> +
> +  @Override
> +  public void configure(MiniAccumuloConfigImpl cfg, Configuration
> hadoopCoreSite) {
> +    cfg.useMiniDFS(true);
> +    cfg.setNumTservers(1);
> +    cfg.setProperty(Property.INSTANCE_ZK_TIMEOUT, "5s");
> +  }
> +
> +  @Test(timeout = 30 * 1000)
> +  public void test() throws Exception {
> +    final Connector conn = this.getConnector();
> +    // Yes, there's a tabletserver
> +    assertEquals(1, conn.instanceOperations().getTabletServers().size());
> +    final String tableName = getUniqueNames(1)[0];
> +    conn.tableOperations().create(tableName);
> +    // Kill dfs
> +    cluster.getMiniDfs().shutdown();
> +    // ask the tserver to do something
> +    final AtomicReference<Exception> ex = new AtomicReference<>();
> +    Thread splitter = new Thread() {
> +      public void run() {
> +        try {
> +          TreeSet<Text> splits = new TreeSet<>();
> +          splits.add(new Text("X"));
> +          conn.tableOperations().addSplits(tableName, splits);
> +        } catch (Exception e) {
> +          ex.set(e);
> +        }
> +      }
> +    };
> +    splitter.start();
> +    // wait for the tserver to give up on writing to the WAL
> +    while (conn.instanceOperations().getTabletServers().size() == 1) {
> +      UtilWaitThread.sleep(1000);
> +    }
> +  }
> +
> +}
>
>

Fwd: [1/2] git commit: ACCUMULO-2480 make the tserver give up and die if openning the WAL experiences 5 errors in 10 seconds

Posted by Josh Elser <jo...@gmail.com>.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/72156b82/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
----------------------------------------------------------------------
diff --git 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
index 243b405..86ae596 100644
--- 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
+++ 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
@@ -86,6 +91,14 @@ public class TabletServerLogger {

    private final AtomicLong syncCounter;
    private final AtomicLong flushCounter;
+
+  private final static int HALT_AFTER_ERROR_COUNT = 5;
+  private final Cache<Long, Object> walErrors;
+  {
+    // Die if we get 5 WAL creation errors in 10 seconds
+    walErrors = 
CacheBuilder.newBuilder().maximumSize(HALT_AFTER_ERROR_COUNT).expireAfterWrite(10, 
TimeUnit.SECONDS).build();
+  }
+

Why the extra pair of brackets instead of just assigning the new cache 
to walErrors?