You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2012/01/13 02:52:04 UTC

svn commit: r1230856 - in /lucene/dev/branches/solrcloud/solr/core/src: java/org/apache/solr/update/UpdateLog.java test/org/apache/solr/search/TestRecovery.java

Author: yonik
Date: Fri Jan 13 01:52:04 2012
New Revision: 1230856

URL: http://svn.apache.org/viewvc?rev=1230856&view=rev
Log:
catch throwables to handle assertions, handle recovering from a truncated log file

Modified:
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/UpdateLog.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/search/TestRecovery.java

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/UpdateLog.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/UpdateLog.java?rev=1230856&r1=1230855&r2=1230856&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/UpdateLog.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/UpdateLog.java Fri Jan 13 01:52:04 2012
@@ -792,7 +792,7 @@ public class UpdateLog implements Plugin
     public void run() {
       try {
 
-        uhandler.core.log.warn("Starting log replay " + tlogReader);
+        uhandler.core.log.warn("Starting log replay " + translog + " active="+activeLog + "starting pos=" + recoveryInfo.positionOfStart);
 
         tlogReader = translog.getReader(recoveryInfo.positionOfStart);
 
@@ -819,6 +819,7 @@ public class UpdateLog implements Plugin
 
           try {
             if (testing_logReplayHook != null) testing_logReplayHook.run();
+            o = null;
             o = tlogReader.next();
             if (o == null && activeLog) {
               if (!finishing) {
@@ -845,6 +846,8 @@ public class UpdateLog implements Plugin
             SolrException.log(log,e);
           } catch (IOException e) {
             SolrException.log(log,e);
+          } catch (Throwable e) {
+            SolrException.log(log,e);
           }
 
           if (o == null) break;
@@ -917,7 +920,7 @@ public class UpdateLog implements Plugin
             recoveryInfo.errors++;
             log.warn("Unexpected log entry or corrupt log.  Entry=" + o, cl);
             // would be caused by a corrupt transaction log
-          } catch (Exception ex) {
+          } catch (Throwable ex) {
             recoveryInfo.errors++;
             log.warn("Exception replaying log", ex);
             // something wrong with the request?
@@ -952,7 +955,7 @@ public class UpdateLog implements Plugin
         tlogReader.close();
         translog.decref();
 
-      } catch (Exception e) {
+      } catch (Throwable e) {
         recoveryInfo.errors++;
         SolrException.log(log,e);
       } finally {

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/search/TestRecovery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/search/TestRecovery.java?rev=1230856&r1=1230855&r2=1230856&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/search/TestRecovery.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/search/TestRecovery.java Fri Jan 13 01:52:04 2012
@@ -40,7 +40,8 @@ import static org.apache.solr.update.pro
 
 public class TestRecovery extends SolrTestCaseJ4 {
   private static String SEEN_LEADER_VAL="true"; // value that means we've seen the leader and have version info (i.e. we are a non-leader replica)
-
+  private static int timeout=60;  // acquire timeout in seconds.  change this to a huge number when debugging to prevent threads from advancing.
+  
   @BeforeClass
   public static void beforeClass() throws Exception {
     initCore("solrconfig-tlog.xml","schema12.xml");
@@ -58,7 +59,7 @@ public class TestRecovery extends SolrTe
         @Override
         public void run() {
           try {
-            logReplay.acquire();
+            assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
           } catch (Exception e) {
             throw new RuntimeException(e);
           }
@@ -103,7 +104,7 @@ public class TestRecovery extends SolrTe
       assertJQ(req("qt","/get", "getVersions",""+versions.size()),"/versions==" + versions);
 
       // wait until recovery has finished
-      assertTrue(logReplayFinish.tryAcquire(60, TimeUnit.SECONDS));
+      assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
 
       assertJQ(req("q","*:*") ,"/response/numFound==2");
 
@@ -123,7 +124,7 @@ public class TestRecovery extends SolrTe
       // h.getCore().getUpdateHandler().getUpdateLog().recoverFromLog();
 
       // wait until recovery has finished
-      assertTrue(logReplayFinish.tryAcquire(60, TimeUnit.SECONDS));
+      assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
       assertJQ(req("q","*:*") ,"/response/numFound==4");
       assertJQ(req("q","id:2") ,"/response/numFound==0");
 
@@ -159,7 +160,7 @@ public class TestRecovery extends SolrTe
       @Override
       public void run() {
         try {
-          logReplay.acquire();
+          assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
         } catch (Exception e) {
           throw new RuntimeException(e);
         }
@@ -353,7 +354,7 @@ public class TestRecovery extends SolrTe
         @Override
         public void run() {
           try {
-            logReplay.acquire();
+            assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
           } catch (Exception e) {
             throw new RuntimeException(e);
           }
@@ -416,7 +417,7 @@ public class TestRecovery extends SolrTe
       assertJQ(req("qt","/get", "getVersions",""+maxReq), "/versions==" + versions.subList(0,Math.min(maxReq,start)));
 
       logReplay.release(1000);
-      logReplayFinish.acquire();  // wait until replay has finished
+      assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
 
       assertJQ(req("qt","/get", "getVersions",""+maxReq), "/versions==" + versions.subList(0,Math.min(maxReq,start)));
 
@@ -429,7 +430,7 @@ public class TestRecovery extends SolrTe
       assertEquals(1, UpdateLog.getLogList(logDir).length);
 
       //
-      // test that a corrupt tlog file doesn't stop us
+      // test that a corrupt tlog file doesn't stop us from coming up, or seeing versions before that tlog file.
       //
       addDocs(1, start, new LinkedList<Long>()); // don't add this to the versions list because we are going to lose it...
       h.close();
@@ -445,9 +446,67 @@ public class TestRecovery extends SolrTe
       assertJQ(req("qt", "/get", "getVersions", "" + maxReq), "/versions==" + versions.subList(0, Math.min(maxReq, start)));
       resetExceptionIgnores();
 
+    } finally {
+      DirectUpdateHandler2.commitOnClose = true;
+      UpdateLog.testing_logReplayHook = null;
+      UpdateLog.testing_logReplayFinishHook = null;
+    }
+  }
+
+  //
+  // test that a partially written last tlog entry (that will cause problems for both reverse reading and for
+  // log replay) doesn't stop us from coming up, and from recovering the documents that were not cut off.
+  //
+  @Test
+  public void testTruncatedLog() throws Exception {
+    try {
+      DirectUpdateHandler2.commitOnClose = false;
+      final Semaphore logReplay = new Semaphore(0);
+      final Semaphore logReplayFinish = new Semaphore(0);
+
+      UpdateLog.testing_logReplayHook = new Runnable() {
+        @Override
+        public void run() {
+          try {
+            assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
+          } catch (Exception e) {
+            throw new RuntimeException(e);
+          }
+        }
+      };
+
+      UpdateLog.testing_logReplayFinishHook = new Runnable() {
+        @Override
+        public void run() {
+          logReplayFinish.release();
+        }
+      };
 
+      File logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();
 
+      clearIndex();
+      assertU(commit());
 
+      assertU(adoc("id","1"));
+      assertU(adoc("id","2"));
+      assertU(adoc("id","3"));
+
+      h.close();
+      String[] files = UpdateLog.getLogList(logDir);
+      Arrays.sort(files);
+      RandomAccessFile raf = new RandomAccessFile(new File(logDir, files[files.length-1]), "rw");
+      raf.seek(raf.length());  // seek to end
+      raf.writeLong(0xffffffffffffffffL);
+      raf.writeChars("This should be appended to a good log file, representing a bad partially written record.");
+      raf.close();
+
+      logReplay.release(1000);
+      logReplayFinish.drainPermits();
+      ignoreException("OutOfBoundsException");  // this is what the corrupted log currently produces... subject to change.
+      createCore();
+      assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
+      resetExceptionIgnores();
+      assertJQ(req("q","*:*") ,"/response/numFound==3");
 
     } finally {
       DirectUpdateHandler2.commitOnClose = true;