You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by va...@apache.org on 2016/08/13 11:09:08 UTC

lucene-solr:branch_6x: SOLR-9320: Improve logging

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 99785c0e0 -> 5fc35a65c


SOLR-9320: Improve logging


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5fc35a65
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5fc35a65
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5fc35a65

Branch: refs/heads/branch_6x
Commit: 5fc35a65c39b7fb4ca49675d3ef9cd7f9d6c0fa8
Parents: 99785c0
Author: Varun Thacker <va...@apache.org>
Authored: Sat Aug 13 16:20:51 2016 +0530
Committer: Varun Thacker <va...@apache.org>
Committed: Sat Aug 13 16:38:26 2016 +0530

----------------------------------------------------------------------
 .../org/apache/solr/cloud/DeleteNodeCmd.java    | 22 +++++++-----
 .../cloud/OverseerCollectionMessageHandler.java |  6 ++--
 .../org/apache/solr/cloud/ReplaceNodeCmd.java   | 35 ++++++++++----------
 3 files changed, 32 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5fc35a65/solr/core/src/java/org/apache/solr/cloud/DeleteNodeCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/DeleteNodeCmd.java b/solr/core/src/java/org/apache/solr/cloud/DeleteNodeCmd.java
index cbcfa88..3e60090 100644
--- a/solr/core/src/java/org/apache/solr/cloud/DeleteNodeCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/DeleteNodeCmd.java
@@ -20,6 +20,7 @@ package org.apache.solr.cloud;
 
 import java.lang.invoke.MethodHandles;
 import java.util.List;
+import java.util.Locale;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 
@@ -27,11 +28,13 @@ import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.Utils;
 import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
+import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
+
 public class DeleteNodeCmd implements OverseerCollectionMessageHandler.Cmd {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
@@ -42,43 +45,44 @@ public class DeleteNodeCmd implements OverseerCollectionMessageHandler.Cmd {
   }
 
   @Override
-  public Object call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
+  public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
     ocmh.checkRequired(message, "node");
     String node = message.getStr("node");
     if (!state.liveNodesContain(node)) {
       throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Source Node: " + node + " is not live");
     }
     List<ZkNodeProps> sourceReplicas = ReplaceNodeCmd.getReplicasOfNode(node, state);
-    cleanupReplicas(results, state, sourceReplicas, ocmh);
-    return null;
+    cleanupReplicas(results, state, sourceReplicas, ocmh, node);
   }
 
   static void cleanupReplicas(NamedList results,
                               ClusterState clusterState,
                               List<ZkNodeProps> sourceReplicas,
-                              OverseerCollectionMessageHandler ocmh) throws InterruptedException {
+                              OverseerCollectionMessageHandler ocmh, String node) throws InterruptedException {
     CountDownLatch cleanupLatch = new CountDownLatch(sourceReplicas.size());
     for (ZkNodeProps sourceReplica : sourceReplicas) {
-      log.info("deleting replica from from node {} ", Utils.toJSONString(sourceReplica));
+      log.info("Deleting replica for collection={} shard={} on node={}", sourceReplica.getStr(COLLECTION_PROP), sourceReplica.getStr(SHARD_ID_PROP), node);
       NamedList deleteResult = new NamedList();
       try {
         ocmh.deleteReplica(clusterState, sourceReplica.plus("parallel", "true"), deleteResult, () -> {
           cleanupLatch.countDown();
           if (deleteResult.get("failure") != null) {
             synchronized (results) {
-              results.add("failure", "could not delete because  " + deleteResult.get("failure") + "  " + Utils.toJSONString(sourceReplica));
+              results.add("failure", String.format(Locale.ROOT, "Failed to delete replica for collection=%s shard=%s" +
+                  " on node=%s", sourceReplica.getStr(COLLECTION_PROP), sourceReplica.getStr(SHARD_ID_PROP), node));
             }
           }
         });
       } catch (KeeperException e) {
-        log.info("Error deleting ", e);
+        log.warn("Error deleting ", e);
         cleanupLatch.countDown();
       } catch (Exception e) {
+        log.warn("Error deleting ", e);
         cleanupLatch.countDown();
         throw e;
       }
     }
-    log.info("Waiting for deletes to complete");
+    log.debug("Waiting for delete node action to complete");
     cleanupLatch.await(5, TimeUnit.MINUTES);
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5fc35a65/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
index 908d35c..0588446 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
@@ -316,7 +316,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
         default: {
           Cmd command = commandMap.get(action);
           if (command != null) {
-            command.call(zkStateReader.getClusterState(),message, results);
+            command.call(zkStateReader.getClusterState(), message, results);
           } else {
             throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown operation:"
                 + operation);
@@ -617,7 +617,6 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
   @SuppressWarnings("unchecked")
   void deleteReplica(ClusterState clusterState, ZkNodeProps message, NamedList results, Runnable onComplete)
       throws KeeperException, InterruptedException {
-    log.info("deleteReplica() : {}", Utils.toJSONString(message));
     checkRequired(message, COLLECTION_PROP, SHARD_ID_PROP, REPLICA_PROP);
     String collectionName = message.getStr(COLLECTION_PROP);
     String shard = message.getStr(SHARD_ID_PROP);
@@ -664,7 +663,6 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
     params.set(CoreAdminParams.DELETE_DATA_DIR, message.getBool(CoreAdminParams.DELETE_DATA_DIR, true));
 
     sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap.get());
-    AtomicReference<Exception> exp = new AtomicReference<>();
 
     Callable<Boolean> callable = () -> {
       try {
@@ -2809,7 +2807,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
 
   interface Cmd {
 
-    Object call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception;
+    void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception;
 
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5fc35a65/solr/core/src/java/org/apache/solr/cloud/ReplaceNodeCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ReplaceNodeCmd.java b/solr/core/src/java/org/apache/solr/cloud/ReplaceNodeCmd.java
index 0cfd089..aad9cc7 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ReplaceNodeCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ReplaceNodeCmd.java
@@ -21,6 +21,7 @@ package org.apache.solr.cloud;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
@@ -35,7 +36,6 @@ import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.CoreAdminParams;
 import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.Utils;
 import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -53,7 +53,7 @@ public class ReplaceNodeCmd implements OverseerCollectionMessageHandler.Cmd {
   }
 
   @Override
-  public Object call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
+  public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
     ZkStateReader zkStateReader = ocmh.zkStateReader;
     ocmh.checkRequired(message, "source", "target");
     String source = message.getStr("source");
@@ -76,23 +76,25 @@ public class ReplaceNodeCmd implements OverseerCollectionMessageHandler.Cmd {
 
     for (ZkNodeProps sourceReplica : sourceReplicas) {
       NamedList nl = new NamedList();
-      log.info("going to create replica {}", Utils.toJSONString(sourceReplica));
+      log.info("Going to create replica for collection={} shard={} on node={}", sourceReplica.getStr(COLLECTION_PROP), sourceReplica.getStr(SHARD_ID_PROP), target);
       ZkNodeProps msg = sourceReplica.plus("parallel", String.valueOf(parallel)).plus(CoreAdminParams.NODE, target);
       final ZkNodeProps addedReplica = ocmh.addReplica(clusterState,
           msg, nl, () -> {
             countDownLatch.countDown();
             if (nl.get("failure") != null) {
-              log.warn("failed to create : " + Utils.toJSONString(msg));
+              String errorString = String.format(Locale.ROOT, "Failed to create replica for collection=%s shard=%s" +
+                  " on node=%s", sourceReplica.getStr(COLLECTION_PROP), sourceReplica.getStr(SHARD_ID_PROP), target);
+              log.warn(errorString);
               // one replica creation failed. Make the best attempt to
               // delete all the replicas created so far in the target
               // and exit
               synchronized (results) {
-                results.add("failure", "Could not create copy of replica " + Utils.toJSONString(sourceReplica));
+                results.add("failure", errorString);
                 anyOneFailed.set(true);
               }
             } else {
-              log.info("successfully created : " + Utils.toJSONString(msg));
-
+              log.debug("Successfully created replica for collection={} shard={} on node={}",
+                  sourceReplica.getStr(COLLECTION_PROP), sourceReplica.getStr(SHARD_ID_PROP), target);
             }
           });
 
@@ -101,12 +103,12 @@ public class ReplaceNodeCmd implements OverseerCollectionMessageHandler.Cmd {
       }
     }
 
-    log.info("Waiting for creates to complete ");
+    log.debug("Waiting for replace node action to complete");
     countDownLatch.await(5, TimeUnit.MINUTES);
-    log.info("Waiting over for creates to complete ");
+    log.debug("Finished waiting for replace node action to complete");
 
     if (anyOneFailed.get()) {
-      log.info("failed to create some cores delete all " + Utils.toJSONString(createdReplicas));
+      log.info("Failed to create some replicas. Cleaning up all replicas on target node");
       CountDownLatch cleanupLatch = new CountDownLatch(createdReplicas.size());
       for (ZkNodeProps createdReplica : createdReplicas) {
         NamedList deleteResult = new NamedList();
@@ -115,29 +117,27 @@ public class ReplaceNodeCmd implements OverseerCollectionMessageHandler.Cmd {
             cleanupLatch.countDown();
             if (deleteResult.get("failure") != null) {
               synchronized (results) {
-                results.add("failure", "could not cleanup, because  : " + deleteResult.get("failure") + "  " + Utils.toJSONString(createdReplica));
+                results.add("failure", "Could not cleanup, because of : " + deleteResult.get("failure"));
               }
             }
           });
         } catch (KeeperException e) {
           cleanupLatch.countDown();
-          log.info("Error deleting ", e);
+          log.warn("Error deleting replica ", e);
         } catch (Exception e) {
-          log.error("Unknown Error deleteing", e);
+          log.warn("Error deleting replica ", e);
           cleanupLatch.countDown();
           throw e;
         }
       }
       cleanupLatch.await(5, TimeUnit.MINUTES);
-      return null;
     }
 
 
     // we have reached this far means all replicas could be recreated
     //now cleanup the replicas in the source node
-    DeleteNodeCmd.cleanupReplicas(results, state, sourceReplicas, ocmh);
-    results.add("success", "REPLACENODE completed successfully from  : " + source + " to : " + target);
-    return null;
+    DeleteNodeCmd.cleanupReplicas(results, state, sourceReplicas, ocmh, source);
+    results.add("success", "REPLACENODE action completed successfully from  : " + source + " to : " + target);
   }
 
   static List<ZkNodeProps> getReplicasOfNode(String source, ClusterState state) {
@@ -152,7 +152,6 @@ public class ReplaceNodeCmd implements OverseerCollectionMessageHandler.Cmd {
                 ZkStateReader.CORE_NAME_PROP, replica.getCoreName(),
                 ZkStateReader.REPLICA_PROP, replica.getName(),
                 CoreAdminParams.NODE, source);
-            log.info("src_core : {}", Utils.toJSONString(props));
             sourceReplicas.add(props
             );
           }