You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2012/01/13 02:52:04 UTC
svn commit: r1230856 - in /lucene/dev/branches/solrcloud/solr/core/src:
java/org/apache/solr/update/UpdateLog.java
test/org/apache/solr/search/TestRecovery.java
Author: yonik
Date: Fri Jan 13 01:52:04 2012
New Revision: 1230856
URL: http://svn.apache.org/viewvc?rev=1230856&view=rev
Log:
catch throwables to handle assertions, handle recovering from a truncated log file
Modified:
lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/UpdateLog.java
lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/search/TestRecovery.java
Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/UpdateLog.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/UpdateLog.java?rev=1230856&r1=1230855&r2=1230856&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/UpdateLog.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/UpdateLog.java Fri Jan 13 01:52:04 2012
@@ -792,7 +792,7 @@ public class UpdateLog implements Plugin
public void run() {
try {
- uhandler.core.log.warn("Starting log replay " + tlogReader);
+ uhandler.core.log.warn("Starting log replay " + translog + " active="+activeLog + "starting pos=" + recoveryInfo.positionOfStart);
tlogReader = translog.getReader(recoveryInfo.positionOfStart);
@@ -819,6 +819,7 @@ public class UpdateLog implements Plugin
try {
if (testing_logReplayHook != null) testing_logReplayHook.run();
+ o = null;
o = tlogReader.next();
if (o == null && activeLog) {
if (!finishing) {
@@ -845,6 +846,8 @@ public class UpdateLog implements Plugin
SolrException.log(log,e);
} catch (IOException e) {
SolrException.log(log,e);
+ } catch (Throwable e) {
+ SolrException.log(log,e);
}
if (o == null) break;
@@ -917,7 +920,7 @@ public class UpdateLog implements Plugin
recoveryInfo.errors++;
log.warn("Unexpected log entry or corrupt log. Entry=" + o, cl);
// would be caused by a corrupt transaction log
- } catch (Exception ex) {
+ } catch (Throwable ex) {
recoveryInfo.errors++;
log.warn("Exception replaying log", ex);
// something wrong with the request?
@@ -952,7 +955,7 @@ public class UpdateLog implements Plugin
tlogReader.close();
translog.decref();
- } catch (Exception e) {
+ } catch (Throwable e) {
recoveryInfo.errors++;
SolrException.log(log,e);
} finally {
Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/search/TestRecovery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/search/TestRecovery.java?rev=1230856&r1=1230855&r2=1230856&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/search/TestRecovery.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/search/TestRecovery.java Fri Jan 13 01:52:04 2012
@@ -40,7 +40,8 @@ import static org.apache.solr.update.pro
public class TestRecovery extends SolrTestCaseJ4 {
private static String SEEN_LEADER_VAL="true"; // value that means we've seen the leader and have version info (i.e. we are a non-leader replica)
-
+ private static int timeout=60; // acquire timeout in seconds. change this to a huge number when debugging to prevent threads from advancing.
+
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-tlog.xml","schema12.xml");
@@ -58,7 +59,7 @@ public class TestRecovery extends SolrTe
@Override
public void run() {
try {
- logReplay.acquire();
+ assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
} catch (Exception e) {
throw new RuntimeException(e);
}
@@ -103,7 +104,7 @@ public class TestRecovery extends SolrTe
assertJQ(req("qt","/get", "getVersions",""+versions.size()),"/versions==" + versions);
// wait until recovery has finished
- assertTrue(logReplayFinish.tryAcquire(60, TimeUnit.SECONDS));
+ assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
assertJQ(req("q","*:*") ,"/response/numFound==2");
@@ -123,7 +124,7 @@ public class TestRecovery extends SolrTe
// h.getCore().getUpdateHandler().getUpdateLog().recoverFromLog();
// wait until recovery has finished
- assertTrue(logReplayFinish.tryAcquire(60, TimeUnit.SECONDS));
+ assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
assertJQ(req("q","*:*") ,"/response/numFound==4");
assertJQ(req("q","id:2") ,"/response/numFound==0");
@@ -159,7 +160,7 @@ public class TestRecovery extends SolrTe
@Override
public void run() {
try {
- logReplay.acquire();
+ assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
} catch (Exception e) {
throw new RuntimeException(e);
}
@@ -353,7 +354,7 @@ public class TestRecovery extends SolrTe
@Override
public void run() {
try {
- logReplay.acquire();
+ assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
} catch (Exception e) {
throw new RuntimeException(e);
}
@@ -416,7 +417,7 @@ public class TestRecovery extends SolrTe
assertJQ(req("qt","/get", "getVersions",""+maxReq), "/versions==" + versions.subList(0,Math.min(maxReq,start)));
logReplay.release(1000);
- logReplayFinish.acquire(); // wait until replay has finished
+ assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
assertJQ(req("qt","/get", "getVersions",""+maxReq), "/versions==" + versions.subList(0,Math.min(maxReq,start)));
@@ -429,7 +430,7 @@ public class TestRecovery extends SolrTe
assertEquals(1, UpdateLog.getLogList(logDir).length);
//
- // test that a corrupt tlog file doesn't stop us
+ // test that a corrupt tlog file doesn't stop us from coming up, or seeing versions before that tlog file.
//
addDocs(1, start, new LinkedList<Long>()); // don't add this to the versions list because we are going to lose it...
h.close();
@@ -445,9 +446,67 @@ public class TestRecovery extends SolrTe
assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, start)));
resetExceptionIgnores();
+ } finally {
+ DirectUpdateHandler2.commitOnClose = true;
+ UpdateLog.testing_logReplayHook = null;
+ UpdateLog.testing_logReplayFinishHook = null;
+ }
+ }
+
+ //
+ // test that a partially written last tlog entry (that will cause problems for both reverse reading and for
+ // log replay) doesn't stop us from coming up, and from recovering the documents that were not cut off.
+ //
+ @Test
+ public void testTruncatedLog() throws Exception {
+ try {
+ DirectUpdateHandler2.commitOnClose = false;
+ final Semaphore logReplay = new Semaphore(0);
+ final Semaphore logReplayFinish = new Semaphore(0);
+
+ UpdateLog.testing_logReplayHook = new Runnable() {
+ @Override
+ public void run() {
+ try {
+ assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ };
+
+ UpdateLog.testing_logReplayFinishHook = new Runnable() {
+ @Override
+ public void run() {
+ logReplayFinish.release();
+ }
+ };
+ File logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();
+ clearIndex();
+ assertU(commit());
+ assertU(adoc("id","1"));
+ assertU(adoc("id","2"));
+ assertU(adoc("id","3"));
+
+ h.close();
+ String[] files = UpdateLog.getLogList(logDir);
+ Arrays.sort(files);
+ RandomAccessFile raf = new RandomAccessFile(new File(logDir, files[files.length-1]), "rw");
+ raf.seek(raf.length()); // seek to end
+ raf.writeLong(0xffffffffffffffffL);
+ raf.writeChars("This should be appended to a good log file, representing a bad partially written record.");
+ raf.close();
+
+ logReplay.release(1000);
+ logReplayFinish.drainPermits();
+ ignoreException("OutOfBoundsException"); // this is what the corrupted log currently produces... subject to change.
+ createCore();
+ assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
+ resetExceptionIgnores();
+ assertJQ(req("q","*:*") ,"/response/numFound==3");
} finally {
DirectUpdateHandler2.commitOnClose = true;