You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dr...@apache.org on 2016/06/07 22:02:58 UTC

lucene-solr:SOLR-9191: SOLR-9191: monitor exception fix

Repository: lucene-solr
Updated Branches:
  refs/heads/SOLR-9191 34e308604 -> 23c0edb15


SOLR-9191: monitor exception fix


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/23c0edb1
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/23c0edb1
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/23c0edb1

Branch: refs/heads/SOLR-9191
Commit: 23c0edb154630259209ecd7f54109cb4d28ca861
Parents: 34e3086
Author: Scott Blum <dr...@apache.org>
Authored: Tue Jun 7 17:58:34 2016 -0400
Committer: Scott Blum <dr...@apache.org>
Committed: Tue Jun 7 18:02:45 2016 -0400

----------------------------------------------------------------------
 .../org/apache/solr/cloud/DistributedQueue.java | 66 +++++++++++---------
 1 file changed, 37 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23c0edb1/solr/core/src/java/org/apache/solr/cloud/DistributedQueue.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/DistributedQueue.java b/solr/core/src/java/org/apache/solr/cloud/DistributedQueue.java
index b430ae5..5d4fd87 100644
--- a/solr/core/src/java/org/apache/solr/cloud/DistributedQueue.java
+++ b/solr/core/src/java/org/apache/solr/cloud/DistributedQueue.java
@@ -19,7 +19,6 @@ package org.apache.solr.cloud;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.List;
 import java.util.NoSuchElementException;
 import java.util.TreeSet;
@@ -318,49 +317,58 @@ public class DistributedQueue {
   /**
    * Return the currently-known set of elements, using child names from memory. If no children are found, or no
    * children pass {@code acceptFilter}, waits up to {@code waitMillis} for at least one child to become available.
-   * <p>
+   * <p/>
    * Package-private to support {@link OverseerTaskQueue} specifically.
    */
-  Collection<Pair<String, byte[]>> peekElements(int n, long waitMillis, Function<String, Boolean> acceptFilter) throws KeeperException, InterruptedException {
-    List<Pair<String, byte[]>> result = new ArrayList<>();
+  Collection<Pair<String, byte[]>> peekElements(int max, long waitMillis, Function<String, Boolean> acceptFilter) throws KeeperException, InterruptedException {
+    List<String> foundChildren = new ArrayList<>();
     long waitNanos = TimeUnit.MILLISECONDS.toNanos(waitMillis);
     while (waitNanos > 0) {
       // Trigger a fetch if needed.
-      firstElement();
+      firstChild(false);
 
-      TreeSet<String> childCopy;
       updateLock.lockInterruptibly();
       try {
-        childCopy = new TreeSet<>(knownChildren);
+        for (String child : knownChildren) {
+          if (acceptFilter.apply(child)) {
+            foundChildren.add(child);
+          }
+        }
+        if (!foundChildren.isEmpty()) {
+          break;
+        }
+        waitNanos = changed.awaitNanos(waitNanos);
       } finally {
         updateLock.unlock();
       }
 
-      for (String child : childCopy) {
-        if (acceptFilter.apply(child)) {
-          try {
-            byte[] data = zookeeper.getData(dir + "/" + child, null, null, true);
-            result.add(new Pair<>(child, data));
-            if (result.size() >= n) {
-              return result;
-            }
-          } catch (KeeperException.NoNodeException e) {
-            // Another client deleted the node first, remove the in-memory and retry.
-            updateLock.lockInterruptibly();
-            try {
-              knownChildren.remove(child);
-            } finally {
-              updateLock.unlock();
-            }
-          }
-        }
+      if (!foundChildren.isEmpty()) {
+        break;
       }
-      if (!result.isEmpty()) {
-        return result;
+    }
+
+    // Technically we could restart the method if we fail to actually obtain any valid children
+    // from ZK, but this is a super rare case, and the latency of the ZK fetches would require
+    // much more sophisticated waitNanos tracking.
+    List<Pair<String, byte[]>> result = new ArrayList<>();
+    for (String child : foundChildren) {
+      try {
+        byte[] data = zookeeper.getData(dir + "/" + child, null, null, true);
+        result.add(new Pair<>(child, data));
+        if (result.size() >= max) {
+          break;
+        }
+      } catch (KeeperException.NoNodeException e) {
+        // Another client deleted the node first, remove the in-memory and continue.
+        updateLock.lockInterruptibly();
+        try {
+          knownChildren.remove(child);
+        } finally {
+          updateLock.unlock();
+        }
       }
-      waitNanos = changed.awaitNanos(waitNanos);
     }
-    return Collections.emptyList();
+    return result;
   }
 
   /**