You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2020/09/01 22:51:43 UTC

[lucene-solr] branch reference_impl updated (d97d7f9 -> 7774c8b)

This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a change to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git.


    from d97d7f9  @677 Another attempt at non problematic leak in CloudSolrClientCacheTest
     new d340c0a  @678 Work on debugging testRetryUpdatesWhenClusterStateIsStale.
     new a8ef749  @679 A couple more fixes around debugging testRetryUpdatesWhenClusterStateIsStale.
     new e6cafcc  @680 Change to debug logging.
     new 7d8ccf2  @681 The next Overseer needs a shot when you don't expect the exception.
     new 46a79d1  @682 If we can't write the state update due to version conflict, bail for new Overseer.
     new 1cf199c  Delete the ephemeral znode held by the leader to trigger the watcher on the next replica inline
     new 9ab178d  @683 Try to pin down rare updatelog leak.
     new 7774c8b  @684 Fix solrclient leak.

The 8 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../src/java/org/apache/solr/cloud/Overseer.java   |  7 ++--
 .../solr/cloud/ShardLeaderElectionContext.java     | 15 +++++++-
 .../solr/cloud/ShardLeaderElectionContextBase.java |  4 +-
 .../org/apache/solr/cloud/ZkCollectionTerms.java   |  1 -
 .../java/org/apache/solr/cloud/ZkController.java   | 30 ++++++++++-----
 .../apache/solr/cloud/overseer/ZkStateWriter.java  | 13 ++++---
 .../src/java/org/apache/solr/core/SolrCore.java    | 44 +++++++++++++---------
 .../handler/component/RealTimeGetComponent.java    |  2 +-
 .../org/apache/solr/update/PeerSyncWithLeader.java |  5 +++
 .../java/org/apache/solr/update/UpdateHandler.java | 26 ++++++++-----
 .../solrj/impl/CloudHttp2SolrClientTest.java       | 15 +++-----
 .../solrj/impl/CloudSolrClientCacheTest.java       |  1 +
 12 files changed, 103 insertions(+), 60 deletions(-)


[lucene-solr] 07/08: @683 Try to pin down rare updatelog leak.

Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 9ab178dadec056a6d57dbd6360f69ea7eee0532e
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Tue Sep 1 17:31:11 2020 -0500

    @683 Try to pin down rare updatelog leak.
---
 .../java/org/apache/solr/update/UpdateHandler.java | 26 ++++++++++++++--------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/update/UpdateHandler.java b/solr/core/src/java/org/apache/solr/update/UpdateHandler.java
index bed48c9..59a99bc 100644
--- a/solr/core/src/java/org/apache/solr/update/UpdateHandler.java
+++ b/solr/core/src/java/org/apache/solr/update/UpdateHandler.java
@@ -57,9 +57,10 @@ UpdateHandler implements SolrInfoBean, Closeable {
   protected Vector<SolrEventListener> softCommitCallbacks = new Vector<>();
   protected Vector<SolrEventListener> optimizeCallbacks = new Vector<>();
 
-  protected volatile UpdateLog ulog;
+  protected final UpdateLog ulog;
 
   protected SolrMetricsContext solrMetricsContext;
+  protected volatile boolean closed;
 
   private void parseEventListeners() {
     final Class<SolrEventListener> clazz = SolrEventListener.class;
@@ -96,6 +97,7 @@ UpdateHandler implements SolrInfoBean, Closeable {
 
   @Override
   public void close() throws IOException {
+    this.closed = true;
     if (ulog != null) ulog.close();
     ObjectReleaseTracker.release(this);
   }
@@ -123,6 +125,7 @@ UpdateHandler implements SolrInfoBean, Closeable {
   }
   
   public UpdateHandler(SolrCore core, UpdateLog updateLog)  {
+    UpdateLog ourUpdateLog = null;
     ObjectReleaseTracker.track(this);
     try {
       this.core = core;
@@ -136,29 +139,34 @@ UpdateHandler implements SolrInfoBean, Closeable {
       if (updateLog == null && ulogPluginInfo != null && ulogPluginInfo.isEnabled() && !skipUpdateLog) {
         DirectoryFactory dirFactory = core.getDirectoryFactory();
         if (dirFactory instanceof HdfsDirectoryFactory) {
-          ulog = new HdfsUpdateLog(((HdfsDirectoryFactory) dirFactory).getConfDir());
+          ourUpdateLog = new HdfsUpdateLog(((HdfsDirectoryFactory) dirFactory).getConfDir());
         } else {
           String className = ulogPluginInfo.className == null ? UpdateLog.class.getName() : ulogPluginInfo.className;
-          ulog = core.getResourceLoader().newInstance(className, UpdateLog.class, "update.");
+          ourUpdateLog = core.getResourceLoader().newInstance(className, UpdateLog.class, "update.");
         }
 
         if (!core.isReloaded() && !dirFactory.isPersistent()) {
-          ulog.clearLog(core, ulogPluginInfo);
+          ourUpdateLog.clearLog(core, ulogPluginInfo);
         }
 
         if (log.isInfoEnabled()) {
-          log.info("Using UpdateLog implementation: {}", ulog.getClass().getName());
+          log.info("Using UpdateLog implementation: {}", ourUpdateLog.getClass().getName());
+        }
+        ourUpdateLog.init(ulogPluginInfo);
+        ourUpdateLog.init(this, core);
+        if (updateLog != null) {
+          updateLog.close();
         }
-        ulog.init(ulogPluginInfo);
-        ulog.init(this, core);
       } else {
-        ulog = updateLog;
+        ourUpdateLog = updateLog;
       }
     } catch (Exception e) {
-      IOUtils.closeQuietly(ulog);
+      IOUtils.closeQuietly(ourUpdateLog);
       ObjectReleaseTracker.release(this);
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
     }
+
+    ulog = ourUpdateLog;
   }
 
   /**


[lucene-solr] 04/08: @681 The next Overseer needs a shot when you don't expect the exception.

Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 7d8ccf20ca9293aa59fa5734829881c7098449ba
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Tue Sep 1 16:59:52 2020 -0500

    @681 The next Overseer needs a shot when you don't expect the exception.
---
 solr/core/src/java/org/apache/solr/cloud/Overseer.java | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
index 41de82b..0de28c8 100644
--- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
@@ -287,9 +287,10 @@ public class Overseer implements SolrCloseable {
               return;
             } catch (Exception e) {
               log.error("Unexpected error in Overseer state update loop", e);
-              if (!isClosed()) {
-                continue;
-              }
+              return;
+//              if (!isClosed()) {
+//                continue;
+//              }
             }
           }
 


[lucene-solr] 03/08: @680 Change to debug logging.

Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit e6cafcc238e2339a2fb6d071952778724b3d2c6a
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Tue Sep 1 16:06:28 2020 -0500

    @680 Change to debug logging.
---
 solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
index b60f706..7dc7dec 100644
--- a/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
+++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
@@ -328,7 +328,7 @@ public class ZkStateWriter {
             byte[] data = Utils.toJSON(singletonMap(c.getName(), c));
 
             //if (log.isDebugEnabled()) {
-            log.info("Write state.json prevVersion={} bytes={} cs={}", c.getZNodeVersion(), data.length, c);
+            if (log.isDebugEnabled()) log.debug("Write state.json prevVersion={} bytes={} cs={}", c.getZNodeVersion(), data.length, c);
             //}
             // stat = reader.getZkClient().getCurator().setData().withVersion(prevVersion).forPath(path, data);
             try {


[lucene-solr] 05/08: @682 If we can't write the state update due to version conflict, bail for new Overseer.

Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 46a79d1f14a222915040d1668d8644e86ff5130c
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Tue Sep 1 17:12:22 2020 -0500

    @682 If we can't write the state update due to version conflict, bail for new Overseer.
---
 .../java/org/apache/solr/cloud/overseer/ZkStateWriter.java    | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
index 7dc7dec..cf99221 100644
--- a/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
+++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
@@ -254,8 +254,8 @@ public class ZkStateWriter {
             log.info(
                 "Tried to update the cluster state using version={} but we where rejected, currently at {}",
                 prevVersion, c == null ? "null" : c.getZNodeVersion(), e);
-            prevState = reader.getClusterState();
-            continue;
+
+            throw e;
           }
           ParWork.propegateInterrupt(e);
           throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
@@ -339,9 +339,10 @@ public class ZkStateWriter {
               log.warn(
                   "Tried to update the cluster state using version={} but we where rejected, found {}",
                   c.getZNodeVersion(), stat.getVersion(), bve);
-              lastUpdatedTime = -1;
-              failedUpdates.put(name, c);
-              continue;
+              throw bve;
+           //   lastUpdatedTime = -1;
+//              failedUpdates.put(name, c);
+//              continue;
             }
           } else {
 


[lucene-solr] 01/08: @678 Work on debugging testRetryUpdatesWhenClusterStateIsStale.

Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit d340c0abfa4ab27395fbe31efd62e4191ad07e82
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Tue Sep 1 15:23:42 2020 -0500

    @678 Work on debugging testRetryUpdatesWhenClusterStateIsStale.
---
 .../solr/cloud/ShardLeaderElectionContext.java     | 15 +++++++-
 .../solr/cloud/ShardLeaderElectionContextBase.java |  4 +-
 .../java/org/apache/solr/cloud/ZkController.java   | 20 +++++++---
 .../src/java/org/apache/solr/core/SolrCore.java    | 44 +++++++++++++---------
 .../solrj/impl/CloudHttp2SolrClientTest.java       | 15 +++-----
 .../solrj/impl/CloudSolrClientCacheTest.java       |  1 +
 6 files changed, 63 insertions(+), 36 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
index 2f564f9..f195e8c 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
@@ -270,12 +270,15 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
           }
         }
         if (!success) {
+          if (isClosed()) {
+            return;
+          }
           rejoinLeaderElection(core);
           return;
         }
 
       }
-      if (!isClosed) {
+      if (!isClosed()) {
         try {
           if (replicaType == Replica.Type.TLOG) {
             // stop replicate from old leader
@@ -298,6 +301,10 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
                     "without being up-to-date with the previous leader", coreNodeName);
             zkController.getShardTerms(collection, shardId).setTermEqualsToLeader(coreNodeName);
           }
+          if (isClosed()) {
+            return;
+          }
+
           super.runLeaderProcess(context, weAreReplacement, 0);
 
           assert shardId != null;
@@ -316,6 +323,9 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
 
           try (SolrCore core = cc.getCore(coreName)) {
             if (core != null) {
+              if (isClosed()) {
+                return;
+              }
               core.getCoreDescriptor().getCloudDescriptor().setLeader(true);
               publishActiveIfRegisteredAndNotActive(core);
             } else {
@@ -347,6 +357,9 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
 
               // we could not publish ourselves as leader - try and rejoin election
               try {
+                if (isClosed()) {
+                  return;
+                }
                 rejoinLeaderElection(core);
               } catch (Exception exc) {
                 ParWork.propegateInterrupt(e);
diff --git a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
index 57a888a..c8138f8 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
@@ -83,7 +83,7 @@ class ShardLeaderElectionContextBase extends ElectionContext {
           log.debug("Removing leader registration node on cancel: {} {}", leaderPath, version);
           List<Op> ops = new ArrayList<>(2);
           ops.add(Op.check(Paths.get(leaderPath).getParent().toString(), version));
-          ops.add(Op.check(electionPath, -1));
+          ops.add(Op.check(leaderSeqPath, -1));
           ops.add(Op.delete(leaderPath, -1));
           zkClient.multi(ops);
         } catch (KeeperException e) {
@@ -110,8 +110,6 @@ class ShardLeaderElectionContextBase extends ElectionContext {
           return;
         } catch (Exception e) {
           throw new SolrException(ErrorCode.SERVER_ERROR, "Exception canceling election", e);
-        } finally {
-          version = null;
         }
       } else {
         log.info("No version found for ephemeral leader parent node, won't remove previous leader registration.");
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index c8480c5..67d45b1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -149,6 +149,7 @@ public class ZkController implements Closeable {
   public static final String CLUSTER_SHUTDOWN = "/cluster/shutdown";
 
   static final int WAIT_DOWN_STATES_TIMEOUT_SECONDS = 60;
+  public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
   public final int WAIT_FOR_STATE = Integer.getInteger("solr.waitForState", 10);
 
   private final boolean SKIP_AUTO_RECOVERY = Boolean.getBoolean("solrcloud.skip.autorecovery");
@@ -384,7 +385,16 @@ public class ZkController implements Closeable {
     ContextKey contextKey = new ContextKey(collection, coreNodeName);
     ElectionContext context = electionContexts.get(contextKey);
     if (context != null) {
-      context.close();
+      try {
+        context.cancelElection();
+      } catch (InterruptedException e) {
+        ParWork.propegateInterrupt(e);
+        throw new SolrException(ErrorCode.SERVER_ERROR, e);
+      } catch (KeeperException e) {
+        throw new SolrException(ErrorCode.SERVER_ERROR, e);
+      } finally {
+        context.close();
+      }
     }
   }
 
@@ -403,12 +413,12 @@ public class ZkController implements Closeable {
       public synchronized void command() {
 
         try (ParWork worker = new ParWork("disconnected", true)) {
-          worker.collect(overseerContexts);
-          worker.collect( ZkController.this.overseer);
+          worker.collect("OverseerElectionContexts", overseerContexts.values());
+          worker.collect("Overseer", ZkController.this.overseer);
           worker.collect("", () -> {
             clearZkCollectionTerms();
           });
-          worker.collect(electionContexts.values());
+          worker.collect("electionContexts", electionContexts.values());
           worker.collect("",() -> {
             markAllAsNotLeader(descriptorsSupplier);
           });
@@ -2317,7 +2327,7 @@ public class ZkController implements Closeable {
    */
   public boolean claimAsyncId(String asyncId) throws KeeperException {
     try {
-      return asyncIdsMap.putIfAbsent(asyncId, new byte[0]);
+      return asyncIdsMap.putIfAbsent(asyncId, EMPTY_BYTE_ARRAY);
     } catch (InterruptedException e) {
       ParWork.propegateInterrupt(e);
       throw new RuntimeException(e);
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java
index 6e3fa0a..8978bca 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@@ -3069,24 +3069,7 @@ public final class SolrCore implements SolrInfoBean, Closeable {
       }
     }
     if (deleteInstanceDir) {
-      addCloseHook(new CloseHook() {
-        @Override
-        public void preClose(SolrCore core) {
-          // empty block
-        }
-
-        @Override
-        public void postClose(SolrCore core) {
-          if (desc != null) {
-            try {
-              FileUtils.deleteDirectory(desc.getInstanceDir().toFile());
-            } catch (IOException e) {
-              SolrException.log(log, "Failed to delete instance dir for core:"
-                  + core.getName() + " dir:" + desc.getInstanceDir());
-            }
-          }
-        }
-      });
+      addCloseHook(new SolrCoreDeleteCloseHook(desc));
     }
   }
 
@@ -3332,4 +3315,29 @@ public final class SolrCore implements SolrInfoBean, Closeable {
   public void runAsync(Runnable r) {
     ParWork.getMyPerThreadExecutor().submit(r);
   }
+
+  private static class SolrCoreDeleteCloseHook extends CloseHook {
+    private final CoreDescriptor desc;
+
+    public SolrCoreDeleteCloseHook(CoreDescriptor desc) {
+      this.desc = desc;
+    }
+
+    @Override
+    public void preClose(SolrCore core) {
+      // empty block
+    }
+
+    @Override
+    public void postClose(SolrCore core) {
+      if (desc != null) {
+        try {
+          FileUtils.deleteDirectory(desc.getInstanceDir().toFile());
+        } catch (IOException e) {
+          SolrException.log(log, "Failed to delete instance dir for core:"
+              + core.getName() + " dir:" + desc.getInstanceDir());
+        }
+      }
+    }
+  }
 }
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudHttp2SolrClientTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudHttp2SolrClientTest.java
index a34e2c0..3ec2dea 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudHttp2SolrClientTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudHttp2SolrClientTest.java
@@ -436,7 +436,6 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
     CollectionAdminRequest.createCollection(collectionName, "conf", liveNodes, liveNodes)
         .setMaxShardsPerNode(liveNodes * liveNodes)
         .process(cluster.getSolrClient());
-    cluster.waitForActiveCollection(collectionName, liveNodes, liveNodes * liveNodes);
     // Add some new documents
     new UpdateRequest()
         .add(id, "0", "a_t", "hello1")
@@ -517,7 +516,6 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
     CollectionAdminRequest.createCollection(collectionName, "conf", 1, liveNodes/3, liveNodes/3, liveNodes/3)
         .setMaxShardsPerNode(liveNodes)
         .process(cluster.getSolrClient());
-    cluster.waitForActiveCollection(collectionName, 1, liveNodes);
 
     // Add some new documents
     new UpdateRequest()
@@ -611,7 +609,6 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
     try (CloudSolrClient client = SolrTestCaseJ4.getCloudSolrClient(cluster.getZkServer().getZkAddress())) {
       // important to have one replica on each node
       CollectionAdminRequest.createCollection("foo", "conf", 1, NODE_COUNT).process(client);
-      cluster.waitForActiveCollection("foo", 1, NODE_COUNT);
       client.setDefaultCollection("foo");
 
       Map<String, String> adminPathToMbean = new HashMap<>(CommonParams.ADMIN_PATHS.size());
@@ -666,8 +663,6 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
 
       CollectionAdminRequest.waitForAsyncRequest(async1, client, TIMEOUT);
       CollectionAdminRequest.waitForAsyncRequest(async2, client, TIMEOUT);
-      cluster.waitForActiveCollection("multicollection1", 2, 2);
-      cluster.waitForActiveCollection("multicollection2", 2, 2);
       client.setDefaultCollection("multicollection1");
 
       List<SolrInputDocument> docs = new ArrayList<>(3);
@@ -812,7 +807,6 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
   @Ignore // nocommit ~ possible regression? response doesn't contain "adds"?
   public void testVersionsAreReturned() throws Exception {
     CollectionAdminRequest.createCollection("versions_collection", "conf", 2, 1).process(cluster.getSolrClient());
-    cluster.waitForActiveCollection("versions_collection", 2, 2);
     
     // assert that "adds" are returned
     UpdateRequest updateRequest = new UpdateRequest()
@@ -898,7 +892,6 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
                  CollectionAdminRequest.createCollection(COL, "conf", 1, 1)
                  .setCreateNodeSet(old_leader_node.getNodeName())
                  .process(cluster.getSolrClient()).getStatus());
-    cluster.waitForActiveCollection(COL, 1, 1);
 
     // determine the coreNodeName of only current replica
     Collection<Slice> slices = cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(COL).getSlices();
@@ -926,6 +919,8 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
                    .setNode(new_leader_node.getNodeName())
                    // NOTE: don't use our stale_client for this -- don't tip it off of a collection change
                    .process(cluster.getSolrClient()).getStatus());
+
+      cluster.waitForActiveCollection(COL, 1, 2);
       
       // ...and delete our original leader.
       assertEquals("Couldn't create collection", 0,
@@ -933,7 +928,10 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
                    // NOTE: don't use our stale_client for this -- don't tip it off of a collection change
                    .process(cluster.getSolrClient()).getStatus());
 
-      // stale_client's collection state cache should now only point at a leader that no longer exists.
+//      Thread.currentThread().sleep(3000);
+//
+//      cluster.getZkClient().printLayout();
+      cluster.waitForActiveCollection(COL, 1, 1);
       
       // attempt a (direct) update that should succeed in spite of cached cluster state
       // pointing solely to a node that's no longer part of our collection...
@@ -974,7 +972,6 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
     CollectionAdminRequest.createCollection(collectionName, "conf", liveNodes, 1, 1, pullReplicas)
         .setMaxShardsPerNode(liveNodes)
         .process(cluster.getSolrClient());
-    cluster.waitForActiveCollection(collectionName, liveNodes, liveNodes * (2 + pullReplicas));
     
     // Add some new documents
     new UpdateRequest()
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java
index 08a535b..b647905 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java
@@ -64,6 +64,7 @@ public class CloudSolrClientCacheTest extends SolrTestCaseJ4 {
         this.c = c;
       }
 
+
       @Override
       public boolean isLazilyLoaded() {
         return true;


[lucene-solr] 02/08: @679 A couple more fixes around debugging testRetryUpdatesWhenClusterStateIsStale.

Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit a8ef7497fadb54ac3af466c796b61dd5d94bfe13
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Tue Sep 1 15:32:02 2020 -0500

    @679 A couple more fixes around debugging testRetryUpdatesWhenClusterStateIsStale.
---
 .../core/src/java/org/apache/solr/cloud/ZkCollectionTerms.java |  1 -
 solr/core/src/java/org/apache/solr/cloud/ZkController.java     | 10 ++++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkCollectionTerms.java b/solr/core/src/java/org/apache/solr/cloud/ZkCollectionTerms.java
index 8641b74..25d2b27 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkCollectionTerms.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkCollectionTerms.java
@@ -59,7 +59,6 @@ class ZkCollectionTerms implements AutoCloseable {
 
   public void remove(String shardId, CoreDescriptor coreDescriptor) {
     synchronized (terms) {
-      if (closed) throw new AlreadyClosedException();
       if (getShard(shardId).removeTerm(coreDescriptor)) {
         terms.remove(shardId).close();
       }
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 67d45b1..dd3e15c 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -1947,10 +1947,12 @@ public class ZkController implements Closeable {
   public void unregister(String coreName, CoreDescriptor cd, boolean removeCoreFromZk) throws Exception {
     final String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();
     final String collection = cd.getCloudDescriptor().getCollectionName();
-    ZkCollectionTerms ct = collectionToTerms.get(collection);
-    if (ct != null) {
-      ct.close();
-      ct.remove(cd.getCloudDescriptor().getShardId(), cd);
+    synchronized (collectionToTerms) {
+      ZkCollectionTerms ct = collectionToTerms.get(collection);
+      if (ct != null) {
+        ct.close();
+        ct.remove(cd.getCloudDescriptor().getShardId(), cd);
+      }
     }
     replicasMetTragicEvent.remove(collection+":"+coreNodeName);
 


[lucene-solr] 06/08: Delete the ephemeral znode held by the leader to trigger the watcher on the next replica inline

Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 1cf199c9d50d02a42efbbc2ab089d8133929edba
Author: Timothy Potter <th...@gmail.com>
AuthorDate: Tue Sep 1 16:38:58 2020 -0600

    Delete the ephemeral znode held by the leader to trigger the watcher on the next replica inline
---
 .../src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
index c8138f8..260607a 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
@@ -83,7 +83,7 @@ class ShardLeaderElectionContextBase extends ElectionContext {
           log.debug("Removing leader registration node on cancel: {} {}", leaderPath, version);
           List<Op> ops = new ArrayList<>(2);
           ops.add(Op.check(Paths.get(leaderPath).getParent().toString(), version));
-          ops.add(Op.check(leaderSeqPath, -1));
+          ops.add(Op.delete(leaderSeqPath, -1));
           ops.add(Op.delete(leaderPath, -1));
           zkClient.multi(ops);
         } catch (KeeperException e) {


[lucene-solr] 08/08: @684 Fix solrclient leak.

Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 7774c8bcd941481033c70ae2133d65eab8f0a46a
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Tue Sep 1 17:40:59 2020 -0500

    @684 Fix solrclient leak.
---
 .../java/org/apache/solr/handler/component/RealTimeGetComponent.java | 2 +-
 solr/core/src/java/org/apache/solr/update/PeerSyncWithLeader.java    | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
index 8af6a09..0e251e4 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
@@ -1126,7 +1126,7 @@ public class RealTimeGetComponent extends SearchComponent
     try (PeerSyncWithLeader peerSync = new PeerSyncWithLeader(rb.req.getCore(), syncWithLeader, nVersions)) {
       boolean success = peerSync.sync(versions).isSuccess();
       rb.rsp.add("syncWithLeader", success);
-    } catch (IOException e) {
+    } catch (Exception e) {
       log.error("Error while closing", e);
     }
   }
diff --git a/solr/core/src/java/org/apache/solr/update/PeerSyncWithLeader.java b/solr/core/src/java/org/apache/solr/update/PeerSyncWithLeader.java
index 70573612..e849e49 100644
--- a/solr/core/src/java/org/apache/solr/update/PeerSyncWithLeader.java
+++ b/solr/core/src/java/org/apache/solr/update/PeerSyncWithLeader.java
@@ -33,6 +33,7 @@ import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.cloud.ZkController;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.IOUtils;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.SolrInfoBean;
@@ -113,6 +114,10 @@ public class PeerSyncWithLeader implements SolrMetricProducer {
     return "PeerSync: core="+uhandler.core.getName()+ " url="+myURL +" ";
   }
 
+  public void close() {
+    IOUtils.closeQuietly(clientToLeader);
+  }
+
   /**
    * Sync with leader
    * @param startingVersions : recent versions on startup