You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2015/01/22 14:00:17 UTC

svn commit: r1653838 - in /lucene/dev/branches/branch_5x: ./ solr/ solr/CHANGES.txt solr/core/ solr/core/src/java/org/apache/solr/handler/SnapPuller.java solr/core/src/test/org/apache/solr/cloud/RecoveryAfterSoftCommitTest.java

Author: shalin
Date: Thu Jan 22 13:00:17 2015
New Revision: 1653838

URL: http://svn.apache.org/r1653838
Log:
SOLR-6640: Close searchers before rollback and recovery to avoid index corruption

Added:
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/RecoveryAfterSoftCommitTest.java
      - copied, changed from r1653281, lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/RecoveryAfterSoftCommitTest.java
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/solr/core/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/SnapPuller.java

Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1653838&r1=1653837&r2=1653838&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Thu Jan 22 13:00:17 2015
@@ -482,6 +482,9 @@ Bug Fixes
 * SOLR-7011: Delete collection returns before collection is actually removed.
   (Christine Poerschke via shalin)
 
+* SOLR-6640: Close searchers before rollback and recovery to avoid index corruption.
+  (Robert Muir, Varun Thacker, shalin)
+
 Optimizations
 ----------------------
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/SnapPuller.java?rev=1653838&r1=1653837&r2=1653838&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/SnapPuller.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/SnapPuller.java Thu Jan 22 13:00:17 2015
@@ -56,10 +56,12 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Properties;
+import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
@@ -78,6 +80,8 @@ import org.apache.commons.io.IOUtils;
 import org.apache.http.client.HttpClient;
 import org.apache.lucene.index.IndexCommit;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.SegmentCommitInfo;
+import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
@@ -405,13 +409,41 @@ public class SnapPuller {
         }
         
         if (!isFullCopyNeeded) {
-          // rollback - and do it before we download any files
-          // so we don't remove files we thought we didn't need
-          // to download later
-          solrCore.getUpdateHandler().getSolrCoreState()
-          .closeIndexWriter(core, true);
+          // a searcher might be using some flushed but committed segments
+          // because of soft commits (which open a searcher on IW's data)
+          // so we need to close the existing searcher on the last commit
+          // and wait until we are able to clean up all unused lucene files
+          if (solrCore.getCoreDescriptor().getCoreContainer().isZooKeeperAware()) {
+            solrCore.closeSearcher();
+          }
+
+          // rollback and reopen index writer and wait until all unused files
+          // are successfully deleted
+          solrCore.getUpdateHandler().newIndexWriter(true);
+          RefCounted<IndexWriter> writer = solrCore.getUpdateHandler().getSolrCoreState().getIndexWriter(null);
+          try {
+            IndexWriter indexWriter = writer.get();
+            int c = 0;
+            indexWriter.deleteUnusedFiles();
+            while (hasUnusedFiles(indexDir, commit)) {
+              indexWriter.deleteUnusedFiles();
+              LOG.info("Sleeping for 1000ms to wait for unused lucene index files to be delete-able");
+              Thread.sleep(1000);
+              c++;
+              if (c >= 30)  {
+                LOG.warn("SnapPuller unable to cleanup unused lucene index files so we must do a full copy instead");
+                isFullCopyNeeded = true;
+                break;
+              }
+            }
+            if (c > 0)  {
+              LOG.info("SnapPuller slept for " + (c * 1000) + "ms for unused lucene index files to be delete-able");
+            }
+          } finally {
+            writer.decref();
+          }
+          solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(core, true);
         }
-        
         boolean reloadCore = false;
         
         try {
@@ -542,6 +574,24 @@ public class SnapPuller {
     }
   }
 
+  private boolean hasUnusedFiles(Directory indexDir, IndexCommit commit) throws IOException {
+    Set<String> currentFiles = new HashSet<>();
+    String segmentsFileName = commit.getSegmentsFileName();
+    SegmentInfos infos = SegmentInfos.readCommit(indexDir, segmentsFileName);
+    for (SegmentCommitInfo info : infos.asList()) {
+      Set<String> files = info.info.files(); // All files that belong to this segment
+      currentFiles.addAll(files);
+    }
+    String[] allFiles = indexDir.listAll();
+    for (String file : allFiles) {
+      if (!file.equals(segmentsFileName) && !currentFiles.contains(file)) {
+        LOG.info("Found unused file: " + file);
+        return true;
+      }
+    }
+    return false;
+  }
+
   private volatile Exception fsyncException;
 
   /**

Copied: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/RecoveryAfterSoftCommitTest.java (from r1653281, lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/RecoveryAfterSoftCommitTest.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/RecoveryAfterSoftCommitTest.java?p2=lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/RecoveryAfterSoftCommitTest.java&p1=lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/RecoveryAfterSoftCommitTest.java&r1=1653281&r2=1653838&rev=1653838&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/RecoveryAfterSoftCommitTest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/RecoveryAfterSoftCommitTest.java Thu Jan 22 13:00:17 2015
@@ -20,6 +20,7 @@ package org.apache.solr.cloud;
 import java.io.File;
 import java.util.List;
 
+import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
 import org.apache.solr.client.solrj.request.UpdateRequest;
@@ -29,6 +30,7 @@ import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
 // See SOLR-6640
+@SolrTestCaseJ4.SuppressSSL
 public class RecoveryAfterSoftCommitTest extends AbstractFullDistribZkTestBase {
 
   public RecoveryAfterSoftCommitTest() {