You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2020/07/22 14:40:20 UTC

[lucene-solr] branch reference_impl updated (4128687 -> c4789e8)

This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a change to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git.


    from 4128687  @294 We always have to clear live node on exists.
     new 9976fe0  @295 Work on fresh state on restart after hard stop.
     new c4789e8  @296 Interrupt around log replay.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../java/org/apache/solr/cloud/RecoveryStrategy.java    |  5 ++++-
 .../src/java/org/apache/solr/cloud/ZkController.java    | 12 ++++++++++++
 .../org/apache/solr/update/DefaultSolrCoreState.java    |  2 +-
 .../java/org/apache/solr/common/cloud/SolrZkClient.java |  4 ++++
 .../apache/solr/common/cloud/ZkMaintenanceUtils.java    | 17 ++++++++++++++++-
 .../org/apache/solr/common/util/OrderedExecutor.java    |  8 +++++---
 6 files changed, 42 insertions(+), 6 deletions(-)


[lucene-solr] 01/02: @295 Work on fresh state on restart after hard stop.

Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 9976fe02307a9e78e3df75124ba049fe01f513ee
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Wed Jul 22 09:38:20 2020 -0500

    @295 Work on fresh state on restart after hard stop.
---
 .../src/java/org/apache/solr/cloud/ZkController.java    | 12 ++++++++++++
 .../java/org/apache/solr/common/cloud/SolrZkClient.java |  4 ++++
 .../apache/solr/common/cloud/ZkMaintenanceUtils.java    | 17 ++++++++++++++++-
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 7cb7d15..02c30b8 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -422,6 +422,7 @@ public class ZkController implements Closeable {
       public void command() {
 
         try (ParWork worker = new ParWork("disconnected", true)) {
+          worker.collect(overseerContexts);
           worker.collect( ZkController.this.overseer);
           worker.collect(() -> {
             clearZkCollectionTerms();
@@ -1427,6 +1428,17 @@ public class ZkController implements Closeable {
       } catch (KeeperException.NodeExistsException e) {
         log.warn("Found our ephemeral live node already exists. This must be a quick restart after a hard shutdown, removing existing live node {}", nodePath);
         zkClient.delete(nodePath, -1);
+
+        List<String> collections = zkClient.getChildren(COLLECTIONS_ZKNODE, null, false);
+        for (String collection : collections) {
+          log.warn("Cleaning up ephemerals for leadership for  {}", collection);
+          List<String> shards = zkClient.getChildren(COLLECTIONS_ZKNODE + "/" +collection + "/leaders", null, false);
+          for (String shard : shards) {
+            zkClient.clean(COLLECTIONS_ZKNODE + "/leaders/" + shard);
+            zkClient.cleanChildren(COLLECTIONS_ZKNODE + "/leader_elect/" + shard);
+          }
+        }
+
         zkClient.getSolrZooKeeper().create(nodePath, null, zkClient.getZkACLProvider().getACLsToAdd(nodePath), CreateMode.EPHEMERAL);
       }
     } catch (Exception e) {
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java b/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
index 058afa8..b63ad8d 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
@@ -985,6 +985,10 @@ public class SolrZkClient implements Closeable {
     ZkMaintenanceUtils.clean(this, path);
   }
 
+  public void cleanChildren(String path) throws InterruptedException, KeeperException {
+    ZkMaintenanceUtils.cleanChildren(this, path);
+  }
+
   public void clean(String path, Predicate<String> nodeFilter) throws InterruptedException, KeeperException {
     log.info("clean path {}" + path);
     ZkMaintenanceUtils.clean(this, path, nodeFilter);
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkMaintenanceUtils.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkMaintenanceUtils.java
index 116f5bf..6838442 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkMaintenanceUtils.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkMaintenanceUtils.java
@@ -234,7 +234,6 @@ public class ZkMaintenanceUtils {
     manager.uploadConfigDir(confPath, confName);
   }
 
-  // yeah, it's recursive :(
   public static void clean(SolrZkClient zkClient, String path) throws InterruptedException, KeeperException {
     traverseZkTree(zkClient, path, VISIT_ORDER.VISIT_POST, znode -> {
       try {
@@ -251,6 +250,22 @@ public class ZkMaintenanceUtils {
     });
   }
 
+  public static void cleanChildren(SolrZkClient zkClient, String path) throws InterruptedException, KeeperException {
+    traverseZkTree(zkClient, path, VISIT_ORDER.VISIT_POST, znode -> {
+      try {
+        if (!znode.equals("/") && !znode.equals(path)) {
+          try {
+            zkClient.delete(znode, -1);
+          } catch (KeeperException.NotEmptyException e) {
+            clean(zkClient, znode);
+          }
+        }
+      } catch (KeeperException.NoNodeException r) {
+        return;
+      }
+    });
+  }
+
   /**
    * Delete a path and all of its sub nodes
    * @param filter for node to be deleted


[lucene-solr] 02/02: @296 Interrupt around log replay.

Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit c4789e8802a1e80ad0db73b11a14026e9130f5a0
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Wed Jul 22 09:39:30 2020 -0500

    @296 Interrupt around log replay.
---
 solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java    | 5 ++++-
 .../src/java/org/apache/solr/update/DefaultSolrCoreState.java     | 2 +-
 .../src/java/org/apache/solr/common/util/OrderedExecutor.java     | 8 +++++---
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
index 29d96db..0c601f3 100644
--- a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
+++ b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
@@ -25,6 +25,7 @@ import java.util.List;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import net.sf.saxon.trans.Err;
@@ -928,10 +929,12 @@ public class RecoveryStrategy implements Runnable, Closeable {
       // wait for replay
       RecoveryInfo report;
       try {
-        report = future.get();
+        report = future.get(10, TimeUnit.MINUTES); // nocommit - how long? make configurable too
       } catch (InterruptedException e) {
         ParWork.propegateInterrupt(e);
         throw new InterruptedException();
+      } catch (TimeoutException e) {
+        throw new SolrException(ErrorCode.SERVER_ERROR, e);
       }
       if (report.failed) {
         SolrException.log(log, "Replay failed");
diff --git a/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java b/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
index 7be3229..ef8e8da 100644
--- a/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
+++ b/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
@@ -398,7 +398,7 @@ public final class DefaultSolrCoreState extends SolrCoreState implements Recover
       }
       if (wait && recoveryStrat != null && recoveryFuture != null) {
         try {
-          recoveryFuture.get(10, TimeUnit.MINUTES);
+          recoveryFuture.get(10, TimeUnit.MINUTES); // nocommit - how long? make configurable too
         } catch (InterruptedException e) {
           ParWork.propegateInterrupt(e);
           throw new SolrException(ErrorCode.SERVER_ERROR, e);
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/OrderedExecutor.java b/solr/solrj/src/java/org/apache/solr/common/util/OrderedExecutor.java
index 2095146..7a863e5 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/OrderedExecutor.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/OrderedExecutor.java
@@ -24,6 +24,8 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.RejectedExecutionException;
 import java.util.concurrent.Semaphore;
 
+import org.apache.solr.common.ParWork;
+import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.ExecutorUtil;
 
 public class OrderedExecutor implements Executor {
@@ -57,8 +59,8 @@ public class OrderedExecutor implements Executor {
     try {
       sparseStripedLock.add(lockId);
     } catch (InterruptedException e) {
-      Thread.currentThread().interrupt();
-      return;
+      ParWork.propegateInterrupt(e);
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
     }
 
     try {
@@ -91,7 +93,7 @@ public class OrderedExecutor implements Executor {
 
   /** A set of locks by a key {@code T}, kind of like Google Striped but the keys are sparse/lazy. */
   private static class SparseStripedLock<T> {
-    private ConcurrentHashMap<T, CountDownLatch> map = new ConcurrentHashMap<>();
+    private ConcurrentHashMap<T, CountDownLatch> map = new ConcurrentHashMap<>(32);
     private final Semaphore sizeSemaphore;
 
     SparseStripedLock(int maxSize) {