You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2020/09/07 13:41:56 UTC

[lucene-solr] 02/02: @774 Enable some more tests.

This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl_dev
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 4af70d54ef2586c9657fe153af26662077f8fd3b
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Mon Sep 7 08:41:31 2020 -0500

    @774 Enable some more tests.
---
 .../apache/solr/cloud/OverseerElectionContext.java |   6 +-
 .../solr/cloud/ShardLeaderElectionContext.java     |  16 +-
 .../solr/cloud/ShardLeaderElectionContextBase.java | 202 +++++++++++++--------
 .../java/org/apache/solr/cloud/ZkController.java   |   9 +-
 .../processor/DistributedZkUpdateProcessor.java    |  84 ++++-----
 .../solr/cloud/MissingSegmentRecoveryTest.java     |   1 -
 .../test/org/apache/solr/cloud/RecoveryZkTest.java |   1 -
 .../solr/cloud/TestLeaderElectionZkExpiry.java     |   2 +-
 .../solr/cloud/TestQueryingOnDownCollection.java   |   1 -
 .../cloud/TestTolerantUpdateProcessorCloud.java    |  13 +-
 .../org/apache/solr/cloud/ZkControllerTest.java    |  16 +-
 .../org/apache/solr/cloud/ZkShardTermsTest.java    |   9 +-
 12 files changed, 187 insertions(+), 173 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/OverseerElectionContext.java
index 9bcb731..9ce1657 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerElectionContext.java
@@ -48,7 +48,7 @@ final class OverseerElectionContext extends ShardLeaderElectionContextBase {
   @Override
   void runLeaderProcess(ElectionContext context, boolean weAreReplacement, int pauseBeforeStartMs) throws KeeperException,
           InterruptedException, IOException {
-    if (isClosed() || !zkClient.isConnected()) {
+    if (isClosed() || !zkClient.isConnected() || overseer.isDone()) {
       return;
     }
 
@@ -149,7 +149,7 @@ final class OverseerElectionContext extends ShardLeaderElectionContextBase {
           super.close();
         } catch (Exception e) {
           ParWork.propegateInterrupt(e);
-          log.error("Exception canceling election", e);
+          log.error("Exception closing election", e);
         }
       });
       closer.collect("Overseer", () -> {
@@ -157,7 +157,7 @@ final class OverseerElectionContext extends ShardLeaderElectionContextBase {
           cancelElection(fromCSUpdateThread);
         } catch (Exception e) {
           ParWork.propegateInterrupt(e);
-          log.error("Exception closing Overseer", e);
+          log.error("Exception canceling election", e);
         }
       });
     }
diff --git a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
index f195e8c..3f5587e 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
@@ -36,6 +36,7 @@ import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.ZkCoreNodeProps;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.util.IOUtils;
 import org.apache.solr.common.util.ObjectReleaseTracker;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.core.CoreContainer;
@@ -85,19 +86,8 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
 
   @Override
   public void close() {
-    try (ParWork closer = new ParWork(this, true)) {
-      closer.collect("superClose",() -> super.close());
-      closer.collect("cancelElection",() -> {
-        try {
-          cancelElection();
-        } catch (Exception e) {
-          ParWork.propegateInterrupt(e);
-          log.error("Exception canceling election", e);
-        }
-      });
-      closer.collect(syncStrategy);
-    }
-
+    super.close();
+    IOUtils.closeQuietly(syncStrategy);
     this.isClosed = true;
     ObjectReleaseTracker.release(this);
   }
diff --git a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
index 260607a..ef93244 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
@@ -23,6 +23,8 @@ import java.nio.file.Paths;
 import java.util.Iterator;
 import java.util.List;
 import java.util.ArrayList;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.solr.common.AlreadyClosedException;
 import org.apache.solr.common.ParWork;
@@ -48,6 +50,8 @@ class ShardLeaderElectionContextBase extends ElectionContext {
   private volatile boolean closed;
   private volatile Integer leaderZkNodeParentVersion;
 
+  private static Set<String> locks = ConcurrentHashMap.newKeySet();
+
   public ShardLeaderElectionContextBase(final String coreNodeName, String electionPath, String leaderPath,
                                         ZkNodeProps props, SolrZkClient zkClient) {
     super(coreNodeName, electionPath, leaderPath, props);
@@ -72,54 +76,69 @@ class ShardLeaderElectionContextBase extends ElectionContext {
       return;
     }
     super.cancelElection();
-    try {
-      Integer version = leaderZkNodeParentVersion;
-      if (version != null) {
-        try {
-          // We need to be careful and make sure we *only* delete our own leader registration node.
-          // We do this by using a multi and ensuring the parent znode of the leader registration node
-          // matches the version we expect - there is a setData call that increments the parent's znode
-          // version whenever a leader registers.
-          log.debug("Removing leader registration node on cancel: {} {}", leaderPath, version);
-          List<Op> ops = new ArrayList<>(2);
-          ops.add(Op.check(Paths.get(leaderPath).getParent().toString(), version));
-          ops.add(Op.delete(leaderSeqPath, -1));
-          ops.add(Op.delete(leaderPath, -1));
-          zkClient.multi(ops);
-        } catch (KeeperException e) {
-          if (e instanceof NoNodeException) {
-            // okay
-            return;
-          }
-          if (e instanceof KeeperException.SessionExpiredException) {
-            log.warn("ZooKeeper session expired");
-            throw e;
-          }
+    getLock(leaderPath);
+      try {
+        if (leaderZkNodeParentVersion != null) {
+          try {
+            if (!zkClient.exists(leaderSeqPath)) {
+              return;
+            }
+            // We need to be careful and make sure we *only* delete our own leader registration node.
+            // We do this by using a multi and ensuring the parent znode of the leader registration node
+            // matches the version we expect - there is a setData call that increments the parent's znode
+            // version whenever a leader registers.
+            log.info("Removing leader registration node on cancel, parent node: {} {}", Paths.get(leaderPath).getParent().toString(), leaderZkNodeParentVersion);
+            List<Op> ops = new ArrayList<>(3);
+            ops.add(Op.check(Paths.get(leaderPath).getParent().toString(), leaderZkNodeParentVersion));
+            ops.add(Op.delete(leaderSeqPath, -1));
+            ops.add(Op.delete(leaderPath, -1));
+            zkClient.multi(ops);
+          } catch (KeeperException e) {
+            if (e instanceof NoNodeException) {
+              // okay
+              return;
+            }
+            if (e instanceof KeeperException.SessionExpiredException) {
+              log.warn("ZooKeeper session expired");
+              throw e;
+            }
 
-          List<OpResult> results = e.getResults();
-          for (OpResult result : results) {
-            if (((OpResult.ErrorResult) result).getErr() == -101) {
-              // no node, fine
-            } else {
-              throw new SolrException(ErrorCode.SERVER_ERROR, "Exception canceling election", e);
+            int i = 0;
+            List<OpResult> results = e.getResults();
+            for (OpResult result : results) {
+              if (((OpResult.ErrorResult) result).getErr() == -101) {
+                // no node, fine
+              } else {
+                if (result instanceof OpResult.ErrorResult) {
+                  OpResult.ErrorResult dresult = (OpResult.ErrorResult) result;
+                  if (dresult.getErr() != 0) {
+                    log.error("op=" + i++ + " err=" + dresult.getErr());
+                  }
+                }
+                throw new SolrException(ErrorCode.SERVER_ERROR, "Exception canceling election " + e.getPath(), e);
+              }
             }
-          }
 
-        } catch (InterruptedException | AlreadyClosedException e) {
-          ParWork.propegateInterrupt(e, true);
-          return;
-        } catch (Exception e) {
-          throw new SolrException(ErrorCode.SERVER_ERROR, "Exception canceling election", e);
+          } catch (InterruptedException | AlreadyClosedException e) {
+            ParWork.propegateInterrupt(e, true);
+            return;
+          } catch (Exception e) {
+            throw new SolrException(ErrorCode.SERVER_ERROR, "Exception canceling election", e);
+          }
+        } else {
+          log.info("No version found for ephemeral leader parent node, won't remove previous leader registration.");
         }
-      } else {
-        log.info("No version found for ephemeral leader parent node, won't remove previous leader registration.");
-      }
-    } catch (Exception e) {
-      if (e instanceof  InterruptedException) {
-        ParWork.propegateInterrupt(e);
+      } catch (Exception e) {
+        if (e instanceof InterruptedException) {
+          ParWork.propegateInterrupt(e);
+        }
+        Stat stat = new Stat();
+        zkClient.getData(Paths.get(leaderPath).getParent().toString(), null, stat);
+        log.error("Exception trying to cancel election {} {} {}", stat.getVersion(), e.getClass().getName(), e.getMessage(), e);
+      } finally {
+        releaseLock(leaderPath);
       }
-      log.error("Exception trying to cancel election {} {}", e.getClass().getName(), e.getMessage());
-    }
+
   }
 
   @Override
@@ -127,51 +146,76 @@ class ShardLeaderElectionContextBase extends ElectionContext {
           throws KeeperException, InterruptedException, IOException {
     // register as leader - if an ephemeral is already there, wait to see if it goes away
 
-    String parent = Paths.get(leaderPath).getParent().toString();
-    List<String> errors = new ArrayList<>();
+    getLock(leaderPath);
     try {
-      if (isClosed()) {
-        log.info("Bailing on becoming leader, we are closed");
-        return;
-      }
-      log.info("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
-      List<Op> ops = new ArrayList<>(3);
-
-      // We use a multi operation to get the parent nodes version, which will
-      // be used to make sure we only remove our own leader registration node.
-      // The setData call used to get the parent version is also the trigger to
-      // increment the version. We also do a sanity check that our leaderSeqPath exists.
-
-      ops.add(Op.check(leaderSeqPath, -1));
-      ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps), zkClient.getZkACLProvider().getACLsToAdd(leaderPath), CreateMode.EPHEMERAL));
-      ops.add(Op.setData(parent, null, -1));
-      List<OpResult> results;
-
-      results = zkClient.multi(ops);
-      log.info("Results from call {}", results);
-      Iterator<Op> it = ops.iterator();
-      for (OpResult result : results) {
-        if (result.getType() == ZooDefs.OpCode.setData) {
-          SetDataResult dresult = (SetDataResult) result;
-          Stat stat = dresult.getStat();
-          leaderZkNodeParentVersion = stat.getVersion();
-          log.info("Got leaderZkNodeParentVersion {}", leaderZkNodeParentVersion);
+
+      String parent = Paths.get(leaderPath).getParent().toString();
+      List<String> errors = new ArrayList<>();
+      try {
+        if (isClosed()) {
+          log.info("Bailing on becoming leader, we are closed");
+          return;
+        }
+        log.info("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
+        List<Op> ops = new ArrayList<>(3);
+
+        // We use a multi operation to get the parent nodes version, which will
+        // be used to make sure we only remove our own leader registration node.
+        // The setData call used to get the parent version is also the trigger to
+        // increment the version. We also do a sanity check that our leaderSeqPath exists.
+
+        ops.add(Op.check(leaderSeqPath, -1));
+        ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps), zkClient.getZkACLProvider().getACLsToAdd(leaderPath), CreateMode.EPHEMERAL));
+        ops.add(Op.setData(parent, null, -1));
+        List<OpResult> results;
+
+        results = zkClient.multi(ops);
+        log.info("Results from call {}", results);
+        Iterator<Op> it = ops.iterator();
+        for (OpResult result : results) {
+          if (result.getType() == ZooDefs.OpCode.setData) {
+            SetDataResult dresult = (SetDataResult) result;
+            Stat stat = dresult.getStat();
+            leaderZkNodeParentVersion = stat.getVersion();
+            log.info("Got leaderZkNodeParentVersion {}", leaderZkNodeParentVersion);
+          }
         }
+        // assert leaderZkNodeParentVersion != null;
+
+      } catch (Throwable t) {
+        ParWork.propegateInterrupt(t);
+        throw new SolrException(ErrorCode.SERVER_ERROR, "Could not register as the leader because creating the ephemeral registration node in ZooKeeper failed: " + errors, t);
       }
-    // assert leaderZkNodeParentVersion != null;
+    } finally {
+      releaseLock(leaderPath);
+    }
+  }
 
-    } catch (Throwable t) {
-      ParWork.propegateInterrupt(t);
-      throw new SolrException(ErrorCode.SERVER_ERROR, "Could not register as the leader because creating the ephemeral registration node in ZooKeeper failed: " + errors, t);
+  // TODO: this is a bit hackey to start - we don't always use the same elector, so this is to protect against mistakes,
+  // but it could be more efficient and use wait/notify
+  private void getLock(String leaderPath) {
+    while (locks.contains(leaderPath)) {
+      try {
+        Thread.sleep(50);
+      } catch (InterruptedException e) {
+        ParWork.propegateInterrupt(e);
+        throw new SolrException(ErrorCode.SERVER_ERROR, e);
+      }
+    }
+
+    if (!locks.add(leaderPath)) {
+      getLock(leaderPath);
+    } else {
+      return;
     }
   }
 
+  private void releaseLock(String leaderPath) {
+    locks.remove(leaderPath);
+  }
+
   @Override
   public boolean isClosed() {
     return closed || zkClient.isClosed();
   }
-
-  Integer getLeaderZkNodeParentVersion() {
-    return leaderZkNodeParentVersion;
-  }
 }
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 0f68a39..64fd3d1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -614,7 +614,9 @@ public class ZkController implements Closeable {
     this.shudownCalled = true;
 
     this.isClosed = true;
-
+    if (overseer != null) {
+      overseer.closeAndDone();
+    }
     try (ParWork closer = new ParWork(this, true)) {
       closer.collect(electionContexts.values());
       closer.collect(collectionToTerms.values());
@@ -623,11 +625,6 @@ public class ZkController implements Closeable {
       closer.collect(cloudSolrClient);
       closer.collect(replicateFromLeaders.values());
       closer.collect(overseerContexts.values());
-      closer.collect("Overseer", () -> {
-        if (overseer != null) {
-          overseer.closeAndDone();
-        }
-      });
       closer.addCollect();
       closer.collect(zkStateReader);
       closer.addCollect();
diff --git a/solr/core/src/java/org/apache/solr/update/processor/DistributedZkUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/DistributedZkUpdateProcessor.java
index cc5d616..25429f4 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/DistributedZkUpdateProcessor.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/DistributedZkUpdateProcessor.java
@@ -192,11 +192,6 @@ public class DistributedZkUpdateProcessor extends DistributedUpdateProcessor {
         nodes = getCollectionUrls(collection,
             EnumSet.of(Replica.Type.TLOG, Replica.Type.NRT), true);
 
-        if (nodes != null) {
-          nodes.removeIf((node) -> node.getNodeProps().getCoreNodeName().equals(
-              cmd.getReq().getCore().getCoreDescriptor().getCloudDescriptor().getCoreNodeName()));
-        }
-
         try {
           leaderReplica = zkController.getZkStateReader().getLeaderRetry(collection, cloudDesc.getShardId());
         } catch (InterruptedException e) {
@@ -207,6 +202,7 @@ public class DistributedZkUpdateProcessor extends DistributedUpdateProcessor {
         }
         isLeader = leaderReplica.getName().equals(cloudDesc.getCoreNodeName());
 
+        ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams()));
         if (nodes == null) {
           // This could happen if there are only pull replicas
           throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
@@ -216,6 +212,7 @@ public class DistributedZkUpdateProcessor extends DistributedUpdateProcessor {
         log.info(
             "processCommit - distrib commit isLeader={} commit_end_point={} replicaType={}",
             isLeader, req.getParams().get(COMMIT_END_POINT), replicaType);
+
         if (req.getParams().get(COMMIT_END_POINT, "").equals("replicas")) {
           if (replicaType == Replica.Type.PULL) {
             log.warn("processCommit - Commit not supported on replicas of type "
@@ -225,9 +222,41 @@ public class DistributedZkUpdateProcessor extends DistributedUpdateProcessor {
                 "processCommit - Do a local commit on NRT endpoint for replica");
             doLocalCommit(cmd);
           }
+        } else if (req.getParams().get(COMMIT_END_POINT, "").equals("leaders")) {
+
+          params.set(DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString());
+
+          params.set(COMMIT_END_POINT, "replicas");
+
+          List<SolrCmdDistributor.Node> useNodes = getReplicaNodesForLeader(cloudDesc.getShardId(), leaderReplica);
+
+          log.info(
+              "processCommit - Found the following replicas to send commit to {}",
+              useNodes);
+
+          worker.collect("localCommit", () -> {
+            try {
+              doLocalCommit(cmd);
+            } catch (IOException e) {
+              throw new SolrException(ErrorCode.SERVER_ERROR, e);
+            }
+          });
+
+          if (useNodes != null && useNodes.size() > 0) {
+            log.info("processCommit - send commit to replicas nodes={}",
+                useNodes);
+
+            params.set(DISTRIB_FROM, ZkCoreNodeProps
+                .getCoreUrl(zkController.getBaseUrl(), req.getCore().getName()));
+
+            List<SolrCmdDistributor.Node> finalUseNodes = useNodes;
+
+            worker.collect("distCommit", () -> {
+              cmdDistrib.distribCommit(cmd, finalUseNodes, params);
+            });
+          }
         } else {
           // zk
-          ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams()));
 
           List<SolrCmdDistributor.Node> useNodes = null;
           if (req.getParams().get(COMMIT_END_POINT) == null) {
@@ -236,20 +265,6 @@ public class DistributedZkUpdateProcessor extends DistributedUpdateProcessor {
             params.set(DISTRIB_UPDATE_PARAM, DistribPhase.TOLEADER.toString());
             params.set(COMMIT_END_POINT, "leaders");
 
-            if (isLeader) {
-
-              worker.collect("localCommit", () -> {
-                log.info(
-                    "processCommit - Do a local commit on NRT endpoint for leader");
-                try {
-                  doLocalCommit(cmd);
-                } catch (IOException e) {
-                  log.error("Error on local commit");
-                  throw new SolrException(ErrorCode.SERVER_ERROR, e);
-                }
-              });
-            }
-
             if (useNodes != null && useNodes.size() > 0) {
               log.info("processCommit - send commit to leaders nodes={}",
                   useNodes);
@@ -262,34 +277,7 @@ public class DistributedZkUpdateProcessor extends DistributedUpdateProcessor {
                 cmdDistrib.distribCommit(cmd, finalUseNodes1, params);
               });
             }
-          }
-          if (isLeader) {
-
-            params.set(DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString());
-
-            params.set(COMMIT_END_POINT, "replicas");
-
-            useNodes = getReplicaNodesForLeader(cloudDesc.getShardId(),
-                leaderReplica);
-
-            log.info(
-                "processCommit - Found the following replicas to send commit to {}",
-                useNodes);
-
-            if (useNodes != null && useNodes.size() > 0) {
-              log.info("processCommit - send commit to replicas nodes={}",
-                  useNodes);
-
-              params.set(DISTRIB_FROM, ZkCoreNodeProps
-                  .getCoreUrl(zkController.getBaseUrl(), req.getCore().getName()));
-
-              List<SolrCmdDistributor.Node> finalUseNodes = useNodes;
-              worker.collect("distCommit", () -> {
-                cmdDistrib.distribCommit(cmd, finalUseNodes, params);
-              });
-
-            }
-
+            return;
           }
 
         }
diff --git a/solr/core/src/test/org/apache/solr/cloud/MissingSegmentRecoveryTest.java b/solr/core/src/test/org/apache/solr/cloud/MissingSegmentRecoveryTest.java
index 5eea5d7..91fe063 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MissingSegmentRecoveryTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MissingSegmentRecoveryTest.java
@@ -41,7 +41,6 @@ import org.junit.Ignore;
 import org.junit.Test;
 
 @Slow
-@Ignore // nocommit flakey
 public class MissingSegmentRecoveryTest extends SolrCloudTestCase {
   final String collection = getClass().getSimpleName();
   
diff --git a/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java b/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java
index d6360aa..7d4f5e2 100644
--- a/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java
@@ -39,7 +39,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 @Slow
-@Ignore // nocommit debug
 public class RecoveryZkTest extends SolrCloudTestCase {
 
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionZkExpiry.java b/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionZkExpiry.java
index 95ba0f8..7bd61fd 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionZkExpiry.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionZkExpiry.java
@@ -53,7 +53,7 @@ public class TestLeaderElectionZkExpiry extends SolrTestCaseJ4 {
           .setLeaderConflictResolveWait(5000)
           .setLeaderVoteWait(5000)
           .build();
-      final ZkController zkController = new ZkController(cc, server.getZkAddress(), 15000, cloudConfig, () -> Collections.emptyList());
+      final ZkController zkController = new ZkController(cc, server.getZkClient(), cloudConfig, () -> Collections.emptyList());
       try {
         Thread killer = new Thread() {
           @Override
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestQueryingOnDownCollection.java b/solr/core/src/test/org/apache/solr/cloud/TestQueryingOnDownCollection.java
index 4e3a6da..461f1fe 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestQueryingOnDownCollection.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestQueryingOnDownCollection.java
@@ -36,7 +36,6 @@ import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-@Ignore // nocommit flakey
 public class TestQueryingOnDownCollection extends SolrCloudTestCase {
 
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
index 129aae4..cfa5e34 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
@@ -72,7 +72,7 @@ import org.slf4j.LoggerFactory;
  * </p>
  *
  */
-@Ignore // nocommit debug
+@Ignore // nocommit
 public class TestTolerantUpdateProcessorCloud extends SolrCloudTestCase {
 
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -131,7 +131,7 @@ public class TestTolerantUpdateProcessorCloud extends SolrCloudTestCase {
     HashMap<String, String> urlMap = new HashMap<>();
     for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
       String jettyURL = jetty.getBaseUrl();
-      String nodeKey = jetty.getHost() + ":" + jetty.getLocalPort() + jetty.getBaseUrl().replace("/","_");
+      String nodeKey = jetty.getBaseUrl().replace("/","_");
       urlMap.put(nodeKey, jettyURL.toString());
     }
     ClusterState clusterState = zkStateReader.getClusterState();
@@ -140,7 +140,7 @@ public class TestTolerantUpdateProcessorCloud extends SolrCloudTestCase {
       Replica leader = slice.getLeader();
       assertNotNull("slice has null leader: " + slice.toString(), leader);
       assertNotNull("slice leader has null node name: " + slice.toString(), leader.getNodeName());
-      String leaderUrl = urlMap.remove(leader.getNodeName());
+      String leaderUrl = urlMap.remove("http:__" + leader.getNodeName());
       assertNotNull("could not find URL for " + shardName + " leader: " + leader.getNodeName(),
                     leaderUrl);
       assertEquals("expected two total replicas for: " + slice.getName(),
@@ -150,7 +150,7 @@ public class TestTolerantUpdateProcessorCloud extends SolrCloudTestCase {
       
       for (Replica replica : slice.getReplicas()) {
         if ( ! replica.equals(leader)) {
-          passiveUrl = urlMap.remove(replica.getNodeName());
+          passiveUrl = urlMap.remove("http:__" + replica.getNodeName());
           assertNotNull("could not find URL for " + shardName + " replica: " + replica.getNodeName(),
                         passiveUrl);
         }
@@ -183,8 +183,13 @@ public class TestTolerantUpdateProcessorCloud extends SolrCloudTestCase {
     assertEquals(0, CLOUD_CLIENT.add(doc(f("id", S_TWO_PRE + random().nextInt()),
                                          f("expected_shard_s", "shard2"))).getStatus());
     assertEquals(0, CLOUD_CLIENT.commit().getStatus());
+
+//    Thread.sleep(100);
+//    assertEquals(0, CLOUD_CLIENT.commit().getStatus());
+
     SolrDocumentList docs = CLOUD_CLIENT.query(params("q", "*:*",
                                                       "fl","id,expected_shard_s,[shard]")).getResults();
+
     assertEquals(2, docs.getNumFound());
     assertEquals(2, docs.size());
     for (SolrDocument doc : docs) {
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
index 4536abf..b741f97 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
@@ -170,7 +170,6 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
   }
 
   @Test
-  @Ignore // nocommit debug
   public void testReadConfigName() throws Exception {
     Path zkDir = createTempDir("zkData");
 
@@ -179,11 +178,11 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
     try {
       server.run();
 
-      SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
-      zkClient.start();
+      SolrZkClient zkClient = server.getZkClient();
       String actualConfigName = "firstConfig";
 
-      zkClient.mkdir(ZkConfigManager.CONFIGS_ZKNODE + "/" + actualConfigName);
+      zkClient.makePath(ZkConfigManager.CONFIGS_ZKNODE, false, false);
+      zkClient.makePath(ZkConfigManager.CONFIGS_ZKNODE + "/" + actualConfigName, false, false);
       
       Map<String,Object> props = new HashMap<>();
       props.put("configName", actualConfigName);
@@ -192,10 +191,9 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
               + COLLECTION_NAME, Utils.toJSON(zkProps),
           CreateMode.PERSISTENT, true);
 
-      zkClient.close();
-
       CloudConfig cloudConfig = new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983, "solr").build();
-      ZkController zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, cloudConfig, () -> null);
+      ZkController zkController = new ZkController(cc, zkClient, cloudConfig, () -> null);
+      zkController.start();
       try {
         String configName = zkController.getZkStateReader().readConfigName(COLLECTION_NAME);
         assertEquals(configName, actualConfigName);
@@ -223,7 +221,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
 
       try {
         CloudConfig cloudConfig = new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983, "solr").build();
-        zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, cloudConfig, () -> null);
+        zkController = new ZkController(cc, server.getZkClient(), cloudConfig, () -> null);
       } catch (IllegalArgumentException e) {
         fail("ZkController did not normalize host name correctly");
       } finally {
@@ -278,7 +276,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
 
       try {
         CloudConfig cloudConfig = new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983, "solr").build();
-        zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, cloudConfig, () -> null);
+        zkController = new ZkController(cc, server.getZkClient(), cloudConfig, () -> null);
         zkControllerRef.set(zkController);
 
         zkController.getZkClient().makePath(ZkStateReader.getCollectionPathRoot(collectionName), new byte[0], CreateMode.PERSISTENT, true);
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsTest.java b/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsTest.java
index 279f077..057b212 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsTest.java
@@ -39,6 +39,7 @@ import org.apache.solr.common.ParWork;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.util.TimeOut;
+import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
 import org.junit.BeforeClass;
 import org.junit.Ignore;
@@ -59,14 +60,8 @@ public class ZkShardTermsTest extends SolrCloudTestCase {
 
   @Test
   // commented out on: 17-Feb-2019   @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // added 15-Sep-2018
-  @Ignore // nocommit creating a collection that has exiting collection node is a fail
-  public void testParticipationOfReplicas() throws IOException, SolrServerException, InterruptedException {
+  public void testParticipationOfReplicas() throws Exception {
     String collection = "collection1";
-    try (ZkShardTerms zkShardTerms = new ZkShardTerms(collection, "shard2", cluster.getZkClient())) {
-      zkShardTerms.registerTerm("replica1");
-      zkShardTerms.registerTerm("replica2");
-      zkShardTerms.ensureTermsIsHigher("replica1", Collections.singleton("replica2"));
-    }
 
     // When new collection is created, the old term nodes will be removed
     CollectionAdminRequest.createCollection(collection, 2, 2)