You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ps...@apache.org on 2020/01/30 12:09:31 UTC
[hbase] branch branch-2.2 updated: HBASE-23773
OutputSink.WriterThread exception gets stuck and repeated indefinietly
This is an automated email from the ASF dual-hosted git repository.
psomogyi pushed a commit to branch branch-2.2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.2 by this push:
new c6937c3 HBASE-23773 OutputSink.WriterThread exception gets stuck and repeated indefinietly
c6937c3 is described below
commit c6937c383d455e2ebf1353e65843b217770ae86f
Author: BukrosSzabolcs <bu...@gmail.com>
AuthorDate: Thu Jan 9 21:16:39 2020 +0100
HBASE-23773 OutputSink.WriterThread exception gets stuck and repeated indefinietly
Backport of HBASE-23601
clear exception after logged
try to restart writer threads if needed
---
.../RegionReplicaReplicationEndpoint.java | 1 +
.../org/apache/hadoop/hbase/wal/OutputSink.java | 20 +++-
.../org/apache/hadoop/hbase/wal/WALSplitter.java | 1 +
.../hadoop/hbase/wal/TestOutputSinkWriter.java | 126 +++++++++++++++++++++
4 files changed, 147 insertions(+), 1 deletion(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RegionReplicaReplicationEndpoint.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RegionReplicaReplicationEndpoint.java
index d1498a8..c779119 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RegionReplicaReplicationEndpoint.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RegionReplicaReplicationEndpoint.java
@@ -250,6 +250,7 @@ public class RegionReplicaReplicationEndpoint extends HBaseReplicationEndpoint {
} catch (IOException e) {
LOG.warn("Received IOException while trying to replicate"
+ StringUtils.stringifyException(e));
+ outputSink.restartWriterThreadsIfNeeded();
}
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/OutputSink.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/OutputSink.java
index 729ea8b..35719e8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/OutputSink.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/OutputSink.java
@@ -91,6 +91,19 @@ public abstract class OutputSink {
}
}
+ public synchronized void restartWriterThreadsIfNeeded() {
+ for(int i = 0; i< writerThreads.size(); i++){
+ WriterThread t = writerThreads.get(i);
+ if (!t.isAlive()){
+ String threadName = t.getName();
+ LOG.debug("Replacing dead thread: " + threadName);
+ WriterThread newThread = new WriterThread(controller, entryBuffers, this, threadName);
+ newThread.start();
+ writerThreads.set(i, newThread);
+ }
+ }
+ }
+
/**
* Update region's maximum edit log SeqNum.
*/
@@ -188,7 +201,12 @@ public abstract class OutputSink {
WriterThread(WALSplitter.PipelineController controller, EntryBuffers entryBuffers,
OutputSink sink, int i) {
- super(Thread.currentThread().getName() + "-Writer-" + i);
+ this(controller, entryBuffers, sink, Thread.currentThread().getName() + "-Writer-" + i);
+ }
+
+ WriterThread(WALSplitter.PipelineController controller, EntryBuffers entryBuffers,
+ OutputSink sink, String threadName) {
+ super(threadName);
this.controller = controller;
this.entryBuffers = entryBuffers;
outputSink = sink;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALSplitter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALSplitter.java
index 1663aaa..c981a26 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALSplitter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALSplitter.java
@@ -472,6 +472,7 @@ public class WALSplitter {
if (thrown == null) {
return;
}
+ this.thrown.set(null);
if (thrown instanceof IOException) {
throw new IOException(thrown);
} else {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/wal/TestOutputSinkWriter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/wal/TestOutputSinkWriter.java
new file mode 100644
index 0000000..5249835
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/wal/TestOutputSinkWriter.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.wal;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.junit.Assert;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({ RegionServerTests.class, MediumTests.class })
+public class TestOutputSinkWriter {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(
+ TestOutputSinkWriter.class);
+
+ @Test
+ public void testExeptionHandling() throws IOException, InterruptedException {
+ WALSplitter.PipelineController controller = new WALSplitter.PipelineController();
+ BrokenEntryBuffers entryBuffers = new BrokenEntryBuffers(controller, 2000);
+ OutputSink sink = new OutputSink(controller, entryBuffers, 1) {
+
+ @Override public List<Path> finishWritingAndClose() throws IOException {
+ return null;
+ }
+
+ @Override public Map<byte[],Long> getOutputCounts() {
+ return null;
+ }
+
+ @Override public int getNumberOfRecoveredRegions() {
+ return 0;
+ }
+
+ @Override public void append(WALSplitter.RegionEntryBuffer buffer) throws IOException {
+
+ }
+
+ @Override public boolean keepRegionEvent(WAL.Entry entry) {
+ return false;
+ }
+ };
+
+ //start the Writer thread and give it time trow the exception
+ sink.startWriterThreads();
+ Thread.sleep(1000L);
+
+ //make sure the exception is stored
+ try {
+ controller.checkForErrors();
+ Assert.fail();
+ }
+ catch (RuntimeException re){
+ Assert.assertTrue(true);
+ }
+
+ sink.restartWriterThreadsIfNeeded();
+
+ //after the check the stored exception should be gone
+ try {
+ controller.checkForErrors();
+ }
+ catch (RuntimeException re){
+ Assert.fail();
+ }
+
+ //prep another exception and wait for it to be thrown
+ entryBuffers.setThrowError(true);
+ Thread.sleep(1000L);
+
+ //make sure the exception is stored
+ try {
+ controller.checkForErrors();
+ Assert.fail();
+ }
+ catch (RuntimeException re){
+ Assert.assertTrue(true);
+ }
+ }
+
+ static class BrokenEntryBuffers extends EntryBuffers{
+ boolean throwError = true;
+
+ public BrokenEntryBuffers(WALSplitter.PipelineController controller, long maxHeapUsage) {
+ super(controller, maxHeapUsage);
+ }
+
+ @Override
+ synchronized WALSplitter.RegionEntryBuffer getChunkToWrite() {
+ //This just emulates something going wrong with in the Writer
+ if(throwError){
+ throwError = false;
+ throw new RuntimeException("testing");
+ }
+ return null;
+ }
+
+ public void setThrowError(boolean newValue){
+ throwError = newValue;
+ }
+ };
+}