You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2012/08/09 05:32:28 UTC

svn commit: r1371033 - in /lucene/dev/branches/branch_4x: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java

Author: markrmiller
Date: Thu Aug  9 03:32:27 2012
New Revision: 1371033

URL: http://svn.apache.org/viewvc?rev=1371033&view=rev
Log:
add core name to some of the recovery logging
move getRecentUpdates into try block - we should continue with no updates if there is an exception getting them
move cancelRecovery call into recovery wait sync block

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java?rev=1371033&r1=1371032&r2=1371033&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java Thu Aug  9 03:32:27 2012
@@ -100,7 +100,7 @@ public class RecoveryStrategy extends Th
   private void recoveryFailed(final SolrCore core,
       final ZkController zkController, final String baseUrl,
       final String shardZkNodeName, final CoreDescriptor cd) throws KeeperException, InterruptedException {
-    SolrException.log(log, "Recovery failed - I give up.");
+    SolrException.log(log, "Recovery failed - I give up. Core:" + coreName);
     try {
       zkController.publish(cd, ZkStateReader.RECOVERY_FAILED);
     } finally {
@@ -115,7 +115,7 @@ public class RecoveryStrategy extends Th
     ZkCoreNodeProps leaderCNodeProps = new ZkCoreNodeProps(leaderprops);
     String leaderUrl = leaderCNodeProps.getCoreUrl();
     
-    log.info("Attempting to replicate from " + leaderUrl);
+    log.info("Attempting to replicate from " + leaderUrl + ". Core:" + coreName);
     
     // if we are the leader, either we are trying to recover faster
     // then our ephemeral timed out or we are the only node
@@ -205,7 +205,7 @@ public class RecoveryStrategy extends Th
       SolrQueryResponse rsp = new SolrQueryResponse();
       SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
 
-      log.info("Starting recovery process. recoveringAfterStartup=" + recoveringAfterStartup);
+      log.info("Starting recovery process.  Core:" + coreName + " - recoveringAfterStartup=" + recoveringAfterStartup);
 
       try {
         doRecovery(core);
@@ -233,7 +233,7 @@ public class RecoveryStrategy extends Th
     UpdateLog ulog;
     ulog = core.getUpdateHandler().getUpdateLog();
     if (ulog == null) {
-      SolrException.log(log, "No UpdateLog found - cannot recover");
+      SolrException.log(log, "No UpdateLog found - cannot recover. Core:" + coreName);
       recoveryFailed(core, zkController, baseUrl, coreZkNodeName,
           core.getCoreDescriptor());
       return;
@@ -241,14 +241,17 @@ public class RecoveryStrategy extends Th
 
 
     List<Long> recentVersions;
-    UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates();
+    UpdateLog.RecentUpdates recentUpdates = null;
     try {
+      recentUpdates = ulog.getRecentUpdates();
       recentVersions = recentUpdates.getVersions(ulog.numRecordsToKeep);
     } catch (Throwable t) {
-      SolrException.log(log, "Corrupt tlog - ignoring", t);
+      SolrException.log(log, "Corrupt tlog - ignoring. Core:" + coreName, t);
       recentVersions = new ArrayList<Long>(0);
-    }finally {
-      recentUpdates.close();
+    } finally {
+      if (recentUpdates != null) {
+        recentUpdates.close();
+      }
     }
 
     List<Long> startingVersions = ulog.getStartingVersions();
@@ -282,7 +285,7 @@ public class RecoveryStrategy extends Th
         // last operation at the time of startup had the GAP flag set...
         // this means we were previously doing a full index replication
         // that probably didn't complete and buffering updates in the meantime.
-        log.info("Looks like a previous replication recovery did not complete - skipping peer sync");
+        log.info("Looks like a previous replication recovery did not complete - skipping peer sync. Core:" + coreName);
         firstTime = false;    // skip peersync
       }
     }
@@ -308,7 +311,7 @@ public class RecoveryStrategy extends Th
         // first thing we just try to sync
         if (firstTime) {
           firstTime = false; // only try sync the first time through the loop
-          log.info("Attempting to PeerSync from " + leaderUrl + " recoveringAfterStartup="+recoveringAfterStartup);
+          log.info("Attempting to PeerSync from " + leaderUrl + " Core:" + coreName + " - recoveringAfterStartup="+recoveringAfterStartup);
           // System.out.println("Attempting to PeerSync from " + leaderUrl
           // + " i am:" + zkController.getNodeName());
           PeerSync peerSync = new PeerSync(core,
@@ -319,7 +322,7 @@ public class RecoveryStrategy extends Th
             SolrQueryRequest req = new LocalSolrQueryRequest(core,
                 new ModifiableSolrParams());
             core.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
-            log.info("PeerSync Recovery was successful - registering as Active");
+            log.info("PeerSync Recovery was successful - registering as Active. Core:" + coreName);
             // System.out
             // .println("Sync Recovery was successful - registering as Active "
             // + zkController.getNodeName());
@@ -348,11 +351,11 @@ public class RecoveryStrategy extends Th
             return;
           }
 
-          log.info("PeerSync Recovery was not successful - trying replication");
+          log.info("PeerSync Recovery was not successful - trying replication. Core:" + coreName);
         }
         //System.out.println("Sync Recovery was not successful - trying replication");
-        log.info("Starting Replication Recovery");
-        log.info("Begin buffering updates");
+        log.info("Starting Replication Recovery. Core:" + coreName);
+        log.info("Begin buffering updates. Core:" + coreName);
         ulog.bufferUpdates();
         replayed = false;
 
@@ -364,7 +367,7 @@ public class RecoveryStrategy extends Th
           replay(ulog);
           replayed = true;
 
-          log.info("Replication Recovery was successful - registering as Active");
+          log.info("Replication Recovery was successful - registering as Active. Core:" + coreName);
           // if there are pending recovery requests, don't advert as active
           zkController.publish(core.getCoreDescriptor(), ZkStateReader.ACTIVE);
           close = true;
@@ -387,7 +390,7 @@ public class RecoveryStrategy extends Th
         }
 
       } catch (Throwable t) {
-        log.error("Error while trying to recover.", t);
+        log.error("Error while trying to recover. Core:" + coreName, t);
       }
 
       if (!successfulRecovery) {
@@ -396,13 +399,13 @@ public class RecoveryStrategy extends Th
         // Or do a fall off retry...
         try {
 
-          log.error("Recovery failed - trying again...");
+          log.error("Recovery failed - trying again... Core:" + coreName);
           retries++;
           if (retries >= MAX_RETRIES) {
             if (retries == INTERRUPTED) {
 
             } else {
-              log.error("Recovery failed - max retries exceeded.");
+              log.error("Recovery failed - max retries exceeded. Core:" + coreName);
               recoveryFailed(core, zkController, baseUrl, coreZkNodeName,
                   core.getCoreDescriptor());
             }
@@ -410,7 +413,7 @@ public class RecoveryStrategy extends Th
           }
 
         } catch (Exception e) {
-          log.error("", e);
+          log.error("Core:" + coreName, e);
         }
 
         try {
@@ -422,13 +425,13 @@ public class RecoveryStrategy extends Th
           }
         } catch (InterruptedException e) {
           Thread.currentThread().interrupt();
-          log.warn("Recovery was interrupted", e);
+          log.warn("Recovery was interrupted. Core:" + coreName, e);
           retries = INTERRUPTED;
         }
       }
 
     }
-    log.info("Finished recovery process");
+    log.info("Finished recovery process. Core:" + coreName);
 
   }
 
@@ -437,9 +440,9 @@ public class RecoveryStrategy extends Th
     Future<RecoveryInfo> future = ulog.applyBufferedUpdates();
     if (future == null) {
       // no replay needed\
-      log.info("No replay needed");
+      log.info("No replay needed. Core:" + coreName);
     } else {
-      log.info("Replaying buffered documents");
+      log.info("Replaying buffered documents. Core:" + coreName);
       // wait for replay
       future.get();
     }

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java?rev=1371033&r1=1371032&r2=1371033&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java Thu Aug  9 03:32:27 2012
@@ -198,8 +198,9 @@ public final class DefaultSolrCoreState 
       return;
     }
     
-    cancelRecovery();
     synchronized (recoveryLock) {
+      cancelRecovery();
+      
       while (recoveryRunning) {
         try {
           recoveryLock.wait(1000);