You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2020/12/10 23:35:57 UTC

[lucene-solr] 02/02: @1239 Prep First Hand. Where is that 100-Type Guanyin Bodhisattva at?

This is an automated email from the ASF dual-hosted git repository.

markrmiller pushed a commit to branch reference_impl_dev
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit b74fac9e2ee0e0633ea718380a6d2d51d14aedde
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Thu Dec 10 17:30:26 2020 -0600

    @1239 Prep First Hand. Where is that 100-Type Guanyin Bodhisattva at?
    
    'Wanna live freely, why isn't it so easy?
    I should read a book, but I keep watching this TV
    And I know this lifestyle doesn't really feed me'
---
 gradle/testing/defaults-tests.gradle               |    4 +-
 .../apache/lucene/store/NativeFSLockFactory.java   |    2 +-
 .../org/apache/lucene/util/LuceneTestCase.java     |    8 +-
 solr/cloud-dev/cloud.sh                            |    4 +-
 .../stream/AnalyticsShardRequestManager.java       |    2 +-
 .../solr/analytics/ExpressionFactoryTest.java      |   13 +-
 .../function/field/AbstractAnalyticsFieldTest.java |   18 +
 .../legacy/LegacyAbstractAnalyticsTest.java        |    7 +-
 .../solr/analytics/legacy/LegacyNoFacetTest.java   |   18 +
 .../facet/LegacyAbstractAnalyticsFacetTest.java    |   14 +-
 .../legacy/facet/LegacyFieldFacetExtrasTest.java   |    9 +
 .../legacy/facet/LegacyFieldFacetTest.java         |   48 +
 .../legacy/facet/LegacyRangeFacetTest.java         |   14 +
 .../dataimport/TestHierarchicalDocBuilder.java     |    2 +-
 .../dataimport/TestJdbcDataSourceConvertType.java  |   14 +-
 .../handler/dataimport/TestScriptTransformer.java  |    2 +-
 .../extraction/ExtractingDocumentLoader.java       |    4 +-
 .../handler/extraction/ParseContextConfigTest.java |    8 +-
 .../org/apache/solr/ltr/TestLTROnSolrCloud.java    |    4 +-
 .../org/apache/solr/ltr/TestLTRQParserPlugin.java  |   20 +-
 .../apache/solr/ltr/TestLTRReRankingPipeline.java  |   22 +-
 .../org/apache/solr/ltr/TestLTRScoringQuery.java   |   49 +-
 .../test/org/apache/solr/ltr/TestRerankBase.java   |   66 +-
 .../solr/ltr/TestSelectiveWeightCreation.java      |   36 +-
 .../solr/ltr/feature/TestExternalFeatures.java     |   34 +-
 .../ltr/feature/TestExternalValueFeatures.java     |   30 +-
 .../TestFeatureExtractionFromMultipleSegments.java |   53 +-
 .../solr/ltr/feature/TestOriginalScoreFeature.java |    4 +-
 .../org/apache/solr/ltr/model/TestLinearModel.java |   15 +-
 .../solr/ltr/model/TestNeuralNetworkModel.java     |    2 +-
 .../apache/solr/ltr/norm/TestMinMaxNormalizer.java |   22 +-
 .../solr/ltr/norm/TestStandardNormalizer.java      |   21 +-
 .../solr/ltr/store/rest/TestModelManager.java      |   57 +-
 .../store/rest/TestModelManagerPersistence.java    |    2 +-
 .../collector/SchedulerMetricsCollector.java       |    2 +-
 .../solr/prometheus/exporter/SolrExporter.java     |    2 +-
 .../prometheus/scraper/SolrCloudScraperTest.java   |    2 +-
 .../scraper/SolrStandaloneScraperTest.java         |   28 +-
 solr/core/src/java/org/apache/solr/api/ApiBag.java |    4 +-
 .../client/solrj/embedded/EmbeddedSolrServer.java  |    2 +-
 .../client/solrj/embedded/JettySolrRunner.java     |  187 +-
 .../apache/solr/cloud/CloudConfigSetService.java   |    7 +-
 .../org/apache/solr/cloud/CloudDescriptor.java     |   11 -
 .../src/java/org/apache/solr/cloud/CloudUtil.java  |   21 +-
 .../org/apache/solr/cloud/ElectionContext.java     |    9 +-
 .../java/org/apache/solr/cloud/LeaderElector.java  |  395 ++--
 .../src/java/org/apache/solr/cloud/Overseer.java   |  255 ++-
 .../cloud/OverseerConfigSetMessageHandler.java     |   17 +-
 .../apache/solr/cloud/OverseerElectionContext.java |   51 +-
 .../apache/solr/cloud/OverseerMessageHandler.java  |    3 +-
 .../solr/cloud/OverseerTaskExecutorTask.java       |    7 +-
 .../apache/solr/cloud/OverseerTaskProcessor.java   |   51 +-
 .../org/apache/solr/cloud/OverseerTaskQueue.java   |  134 +-
 .../solr/cloud/RecoveringCoreTermWatcher.java      |   27 +-
 .../org/apache/solr/cloud/RecoveryStrategy.java    |  644 +++---
 .../solr/cloud/ShardLeaderElectionContext.java     |   88 +-
 .../solr/cloud/ShardLeaderElectionContextBase.java |   55 +-
 .../java/org/apache/solr/cloud/SolrZkServer.java   |    6 +-
 .../java/org/apache/solr/cloud/StatePublisher.java |   58 +-
 .../org/apache/solr/cloud/ZkCollectionTerms.java   |   43 +-
 .../java/org/apache/solr/cloud/ZkController.java   |  824 ++++----
 .../org/apache/solr/cloud/ZkDistributedQueue.java  |  249 +--
 .../java/org/apache/solr/cloud/ZkShardTerms.java   |   82 +-
 .../apache/solr/cloud/ZkSolrResourceLoader.java    |   20 +-
 .../solr/cloud/api/collections/AddReplicaCmd.java  |    6 +-
 .../solr/cloud/api/collections/AliasCmd.java       |   13 +-
 .../apache/solr/cloud/api/collections/Assign.java  |    2 +-
 .../cloud/api/collections/CreateCollectionCmd.java |  196 +-
 .../solr/cloud/api/collections/DeleteAliasCmd.java |    2 +-
 .../cloud/api/collections/DeleteCollectionCmd.java |    7 +-
 .../cloud/api/collections/DeleteReplicaCmd.java    |   10 +-
 .../solr/cloud/api/collections/MigrateCmd.java     |    3 +-
 .../solr/cloud/api/collections/MoveReplicaCmd.java |    6 +-
 .../OverseerCollectionMessageHandler.java          |   77 +-
 .../solr/cloud/api/collections/RenameCmd.java      |    2 +-
 .../solr/cloud/api/collections/ReplaceNodeCmd.java |    4 +-
 .../solr/cloud/api/collections/RestoreCmd.java     |    4 +-
 .../solr/cloud/api/collections/SplitShardCmd.java  |  182 +-
 .../solr/cloud/overseer/ClusterStateMutator.java   |    2 +-
 .../solr/cloud/overseer/CollectionMutator.java     |   14 +-
 .../apache/solr/cloud/overseer/ZkStateWriter.java  |  284 +--
 .../java/org/apache/solr/core/BlobRepository.java  |    2 +-
 .../apache/solr/core/CachingDirectoryFactory.java  |    4 +-
 .../org/apache/solr/core/ConfigSetService.java     |   10 +-
 .../java/org/apache/solr/core/CoreContainer.java   |  688 ++++---
 .../src/java/org/apache/solr/core/CoreSorter.java  |    4 +-
 .../src/java/org/apache/solr/core/PluginBag.java   |    4 +-
 .../java/org/apache/solr/core/RequestParams.java   |    2 +-
 .../src/java/org/apache/solr/core/SolrConfig.java  |   95 +-
 .../src/java/org/apache/solr/core/SolrCore.java    |  609 ++++--
 .../src/java/org/apache/solr/core/SolrCores.java   |   37 +-
 .../org/apache/solr/core/SolrResourceLoader.java   |  172 ++
 .../java/org/apache/solr/core/SolrXmlConfig.java   |  197 +-
 .../apache/solr/core/StandardDirectoryFactory.java |   13 +-
 .../java/org/apache/solr/core/XmlConfigFile.java   |  107 +-
 .../src/java/org/apache/solr/core/ZkContainer.java |    6 +-
 .../org/apache/solr/core/backup/BackupManager.java |    2 +-
 .../apache/solr/filestore/DistribPackageStore.java |    2 +-
 .../org/apache/solr/filestore/PackageStoreAPI.java |    8 +-
 .../java/org/apache/solr/handler/BlobHandler.java  |   78 +-
 .../solr/handler/ContentStreamHandlerBase.java     |    7 +
 .../java/org/apache/solr/handler/IndexFetcher.java |   29 +-
 .../apache/solr/handler/ReplicationHandler.java    |   26 +-
 .../apache/solr/handler/RequestHandlerBase.java    |   29 +-
 .../org/apache/solr/handler/SchemaHandler.java     |    3 +-
 .../org/apache/solr/handler/SolrConfigHandler.java |    2 +-
 .../solr/handler/admin/AdminHandlersProxy.java     |    2 +-
 .../org/apache/solr/handler/admin/ColStatus.java   |    4 +-
 .../solr/handler/admin/CollectionHandlerApi.java   |    2 +-
 .../solr/handler/admin/CollectionsHandler.java     |   94 +-
 .../solr/handler/admin/ConfigSetsHandler.java      |   38 +-
 .../solr/handler/admin/CoreAdminOperation.java     |   10 +-
 .../solr/handler/admin/DeleteSnapshotOp.java       |   11 +-
 .../solr/handler/admin/HealthCheckHandler.java     |    4 +-
 .../apache/solr/handler/admin/MergeIndexesOp.java  |   98 +-
 .../solr/handler/admin/MetricsHistoryHandler.java  |    6 +-
 .../apache/solr/handler/admin/PrepRecoveryOp.java  |   16 +-
 .../solr/handler/admin/RebalanceLeaders.java       |    4 +-
 .../solr/handler/admin/ZookeeperInfoHandler.java   |    7 +-
 .../solr/handler/component/CloudReplicaSource.java |   99 +-
 .../solr/handler/component/HttpShardHandler.java   |   50 +-
 .../handler/component/HttpShardHandlerFactory.java |   17 +-
 .../handler/component/QueryElevationComponent.java |   12 +-
 .../solr/handler/component/ReplicaSource.java      |    2 +-
 .../solr/handler/component/SearchHandler.java      |    3 +-
 .../solr/handler/component/ShardRequest.java       |    3 +-
 .../handler/component/StandaloneReplicaSource.java |    5 +-
 .../solr/handler/component/TermsComponent.java     |    8 +-
 .../apache/solr/handler/loader/JavabinLoader.java  |   13 +-
 .../org/apache/solr/handler/loader/XMLLoader.java  |    7 +-
 .../apache/solr/request/LocalSolrQueryRequest.java |    6 +
 .../apache/solr/response/RawResponseWriter.java    |    9 +-
 .../apache/solr/rest/ManagedResourceStorage.java   |    8 +-
 .../solr/rest/schema/FieldTypeXmlAdapter.java      |   55 +-
 .../org/apache/solr/schema/AbstractEnumField.java  |    3 +-
 .../apache/solr/schema/FieldTypePluginLoader.java  |   63 +-
 .../solr/schema/FileExchangeRateProvider.java      |    4 +-
 .../java/org/apache/solr/schema/IndexSchema.java   |  132 +-
 .../org/apache/solr/schema/ManagedIndexSchema.java |  100 +-
 .../solr/schema/ManagedIndexSchemaFactory.java     |   36 +-
 .../java/org/apache/solr/schema/SchemaManager.java |    2 +-
 .../apache/solr/schema/ZkIndexSchemaReader.java    |  163 +-
 .../java/org/apache/solr/search/CacheConfig.java   |    5 +-
 .../org/apache/solr/search/SolrCoreParser.java     |   58 +-
 .../org/apache/solr/search/facet/FacetMerger.java  |    2 +-
 .../org/apache/solr/search/join/XCJFQParser.java   |    3 +-
 .../apache/solr/search/join/XCJFQParserPlugin.java |    7 +-
 .../solr/security/PKIAuthenticationPlugin.java     |   11 +-
 .../java/org/apache/solr/servlet/HttpSolrCall.java |  170 +-
 .../apache/solr/servlet/SolrDispatchFilter.java    |  116 +-
 .../apache/solr/servlet/SolrLifcycleListener.java  |   34 +-
 .../apache/solr/servlet/SolrRequestParsers.java    |   15 +-
 .../apache/solr/servlet/SolrShutdownHandler.java   |    6 +-
 .../java/org/apache/solr/update/CommitTracker.java |   14 +-
 .../apache/solr/update/DefaultSolrCoreState.java   |  107 +-
 .../src/java/org/apache/solr/update/PeerSync.java  |   29 +-
 .../org/apache/solr/update/PeerSyncWithLeader.java |   11 +-
 .../org/apache/solr/update/SolrCmdDistributor.java |   53 +-
 .../java/org/apache/solr/update/SolrCoreState.java |   12 +-
 .../org/apache/solr/update/SolrIndexConfig.java    |   85 +-
 .../src/java/org/apache/solr/update/UpdateLog.java |    8 +-
 .../org/apache/solr/update/UpdateShardHandler.java |    2 +-
 .../java/org/apache/solr/update/VersionInfo.java   |   36 +-
 .../AddSchemaFieldsUpdateProcessorFactory.java     |   78 +-
 .../processor/DistributedUpdateProcessor.java      |  230 ++-
 .../processor/DistributedZkUpdateProcessor.java    |  103 +-
 .../update/processor/TolerantUpdateProcessor.java  |    6 +-
 .../processor/UpdateRequestProcessorChain.java     |    5 +
 .../src/java/org/apache/solr/util/ExportTool.java  |   39 +-
 .../java/org/apache/solr/util/SimplePostTool.java  |   31 +-
 .../src/java/org/apache/solr/util/SolrCLI.java     |   12 +-
 .../org/apache/solr/util/StartupLoggingUtils.java  |    2 +-
 .../solr/util/plugin/AbstractPluginLoader.java     |    5 +-
 solr/core/src/resources/ShortClassNames.properties |    1 +
 solr/core/src/test-files/log4j2.xml                |    6 +-
 .../solr/collection1/conf/solrconfig-elevate.xml   |   28 +-
 .../solr/configsets/xcjf/conf/solrconfig.xml       |    6 +
 .../apache/solr/AnalysisAfterCoreReloadTest.java   |    1 +
 .../src/test/org/apache/solr/CursorPagingTest.java |    1 +
 .../src/test/org/apache/solr/EchoParamsTest.java   |   11 +-
 .../test/org/apache/solr/MinimalSchemaTest.java    |   11 +-
 .../test/org/apache/solr/SolrTestCaseJ4Test.java   |   12 +-
 .../test/org/apache/solr/TestCrossCoreJoin.java    |   17 +-
 solr/core/src/test/org/apache/solr/TestJoin.java   |   28 +-
 .../test/org/apache/solr/TestRandomDVFaceting.java |   13 +-
 .../test/org/apache/solr/TestTolerantSearch.java   |   25 +-
 .../analysis/ProtectedTermFilterFactoryTest.java   |   13 +-
 .../analysis/TestWordDelimiterFilterFactory.java   |   87 +-
 .../solr/backcompat/TestLuceneIndexBackCompat.java |    4 +-
 .../TestEmbeddedSolrServerAdminHandler.java        |    2 +
 .../TestEmbeddedSolrServerConstructors.java        |    2 +
 .../embedded/TestEmbeddedSolrServerSchemaAPI.java  |    2 +
 .../client/solrj/embedded/TestJettySolrRunner.java |    4 +-
 .../test/org/apache/solr/cloud/AddReplicaTest.java |   37 +-
 .../solr/cloud/ChaosMonkeyNothingIsSafeTest.java   |    2 +-
 ...aosMonkeyNothingIsSafeWithPullReplicasTest.java |    2 +-
 .../solr/cloud/ChaosMonkeySafeLeaderTest.java      |    3 +-
 .../solr/cloud/ChaosMonkeyShardSplitTest.java      |    9 +-
 .../org/apache/solr/cloud/CleanupOldIndexTest.java |    2 -
 .../cloud/CloudExitableDirectoryReaderTest.java    |    8 +-
 .../apache/solr/cloud/ClusterStateMockUtil.java    |    4 +-
 .../solr/cloud/ClusterStateMockUtilTest.java       |    2 +-
 .../org/apache/solr/cloud/ClusterStateTest.java    |   20 +-
 .../apache/solr/cloud/ClusterStateUpdateTest.java  |   10 +-
 .../org/apache/solr/cloud/CollectionPropsTest.java |    1 +
 .../apache/solr/cloud/CollectionsAPISolrJTest.java |   26 +-
 .../apache/solr/cloud/ConnectionManagerTest.java   |  108 -
 .../solr/cloud/CreateCollectionCleanupTest.java    |    5 +-
 .../solr/cloud/DeleteInactiveReplicaTest.java      |   27 +-
 .../cloud/DeleteLastCustomShardedReplicaTest.java  |    5 +-
 .../test/org/apache/solr/cloud/DeleteNodeTest.java |    3 +-
 .../org/apache/solr/cloud/DeleteReplicaTest.java   |   13 +-
 .../org/apache/solr/cloud/DeleteShardTest.java     |    2 -
 .../org/apache/solr/cloud/DeleteStatusTest.java    |    2 +
 .../apache/solr/cloud/DistributedQueueTest.java    |  324 ---
 .../apache/solr/cloud/DocValuesNotIndexedTest.java |   48 +-
 .../org/apache/solr/cloud/ForceLeaderTest.java     |    2 +-
 .../solr/cloud/FullSolrCloudDistribCmdsTest.java   |   11 +-
 .../org/apache/solr/cloud/LeaderElectionTest.java  |   18 +-
 .../solr/cloud/LeaderVoteWaitTimeoutTest.java      |   41 +-
 .../solr/cloud/MetricsHistoryIntegrationTest.java  |    5 +-
 .../MetricsHistoryWithAuthIntegrationTest.java     |    2 +-
 .../org/apache/solr/cloud/MoveReplicaTest.java     |    4 +-
 .../solr/cloud/MultiSolrCloudTestCaseTest.java     |   31 +-
 .../solr/cloud/NestedShardedAtomicUpdateTest.java  |   23 +-
 .../org/apache/solr/cloud/NodeMutatorTest.java     |    3 +-
 .../OutOfBoxZkACLAndCredentialsProvidersTest.java  |    9 +-
 .../solr/cloud/OverseerModifyCollectionTest.java   |    1 +
 .../apache/solr/cloud/OverseerTaskQueueTest.java   |   94 -
 .../test/org/apache/solr/cloud/OverseerTest.java   |   32 +-
 .../test/org/apache/solr/cloud/RecoveryZkTest.java |    3 +-
 .../org/apache/solr/cloud/ReplaceNodeTest.java     |    7 +-
 .../org/apache/solr/cloud/RollingRestartTest.java  |    4 +-
 .../apache/solr/cloud/ShardRoutingCustomTest.java  |    4 +-
 .../org/apache/solr/cloud/ShardRoutingTest.java    |  125 +-
 .../cloud/SharedFSAutoReplicaFailoverTest.java     |    6 +-
 .../apache/solr/cloud/SolrCloudBridgeTestCase.java |   89 +-
 .../apache/solr/cloud/SolrCloudExampleTest.java    |    4 +-
 .../org/apache/solr/cloud/SolrXmlInZkTest.java     |    7 -
 .../test/org/apache/solr/cloud/SplitShardTest.java |    9 +-
 .../solr/cloud/TestAuthenticationFramework.java    |    1 +
 .../apache/solr/cloud/TestCloudDeleteByQuery.java  |   20 +-
 .../TestCloudPhrasesIdentificationComponent.java   |    2 +
 .../org/apache/solr/cloud/TestCloudPivotFacet.java |    8 +
 .../org/apache/solr/cloud/TestCloudRecovery.java   |    2 +-
 .../org/apache/solr/cloud/TestCloudRecovery2.java  |   26 +-
 .../org/apache/solr/cloud/TestConfigSetsAPI.java   |   91 +-
 .../solr/cloud/TestConfigSetsAPIZkFailure.java     |   13 +-
 .../cloud/TestDeleteCollectionOnDownNodes.java     |   21 +-
 .../cloud/TestDynamicFieldNamesIndexCorrectly.java |    1 +
 .../solr/cloud/TestLocalStatsCacheCloud.java       |    2 +
 .../test/org/apache/solr/cloud/TestLockTree.java   |   22 +-
 .../solr/cloud/TestMiniSolrCloudClusterSSL.java    |    2 +-
 .../solr/cloud/TestOnReconnectListenerSupport.java |   11 +-
 .../org/apache/solr/cloud/TestPrepRecovery.java    |   11 +-
 .../org/apache/solr/cloud/TestPullReplica.java     |    6 +-
 .../apache/solr/cloud/TestRebalanceLeaders.java    |    8 +-
 .../apache/solr/cloud/TestRequestForwarding.java   |    6 +
 .../org/apache/solr/cloud/TestSegmentSorting.java  |    4 +-
 .../solr/cloud/TestSizeLimitedDistributedMap.java  |    6 +
 .../solr/cloud/TestSkipOverseerOperations.java     |    4 +-
 .../cloud/TestSolrCloudWithDelegationTokens.java   |    1 +
 .../solr/cloud/TestSolrCloudWithKerberosAlt.java   |    1 -
 .../cloud/TestStressCloudBlindAtomicUpdates.java   |   61 +-
 .../org/apache/solr/cloud/TestStressLiveNodes.java |    2 +-
 .../org/apache/solr/cloud/TestTlogReplica.java     |    4 +-
 .../cloud/TestTolerantUpdateProcessorCloud.java    |   62 +-
 .../TestTolerantUpdateProcessorRandomCloud.java    |   17 +-
 .../apache/solr/cloud/UnloadDistributedZkTest.java |   30 +-
 .../VMParamsZkACLAndCredentialsProvidersTest.java  |    2 +-
 .../org/apache/solr/cloud/ZkControllerTest.java    |    4 +-
 .../test/org/apache/solr/cloud/ZkFailoverTest.java |    2 +-
 .../org/apache/solr/cloud/ZkShardTermsTest.java    |   12 +-
 .../org/apache/solr/cloud/ZkSolrClientTest.java    |    6 +-
 .../CollectionsAPIAsyncDistributedZkTest.java      |    6 +-
 .../CollectionsAPIDistClusterPerZkTest.java        |    5 +-
 .../CollectionsAPIDistributedZkTest.java           |    5 +-
 .../CreateCollectionsIndexAndRestartTest.java      |   14 +-
 .../solr/cloud/api/collections/ShardSplitTest.java |    9 +-
 .../SimpleCollectionCreateDeleteTest.java          |   25 +-
 .../TestRequestStatusCollectionAPI.java            |    2 +
 .../cloud/overseer/TestClusterStateMutator.java    |    4 +-
 .../overseer/ZkCollectionPropsCachingTest.java     |    1 -
 .../apache/solr/core/BlobRepositoryCloudTest.java  |    2 +
 .../solr/core/ByteBuffersDirectoryFactoryTest.java |   20 +-
 .../solr/core/ConfigureRecoveryStrategyTest.java   |    6 +
 .../test/org/apache/solr/core/CoreSorterTest.java  |   21 +-
 .../org/apache/solr/core/DirectoryFactoryTest.java |    2 +-
 .../solr/core/ExitableDirectoryReaderTest.java     |    8 +
 .../test/org/apache/solr/core/PluginInfoTest.java  |  282 ++-
 .../org/apache/solr/core/RequestHandlersTest.java  |    4 +-
 .../solr/core/SolrCoreCheckLockOnStartupTest.java  |    2 +
 .../test/org/apache/solr/core/SolrCoreTest.java    |    3 +
 .../solr/core/TestBackupRepositoryFactory.java     |    6 +-
 .../test/org/apache/solr/core/TestBadConfig.java   |   10 +-
 .../org/apache/solr/core/TestCodecSupport.java     |   32 +-
 .../src/test/org/apache/solr/core/TestConfig.java  |   62 +-
 .../apache/solr/core/TestConfigSetImmutable.java   |    2 +
 .../apache/solr/core/TestConfigSetProperties.java  |   13 +-
 .../test/org/apache/solr/core/TestConfigSets.java  |   27 +-
 .../org/apache/solr/core/TestCoreContainer.java    |   99 +-
 .../org/apache/solr/core/TestCoreDiscovery.java    |    6 +-
 .../apache/solr/core/TestCorePropertiesReload.java |   11 +-
 .../org/apache/solr/core/TestCustomStream.java     |    1 +
 .../solr/core/TestImplicitCoreProperties.java      |    2 +
 .../org/apache/solr/core/TestJmxIntegration.java   |    7 +-
 .../test/org/apache/solr/core/TestLazyCores.java   |    4 +-
 .../src/test/org/apache/solr/core/TestNRTOpen.java |   12 +-
 .../apache/solr/core/TestReloadAndDeleteDocs.java  |   10 +-
 .../apache/solr/core/TestShardHandlerFactory.java  |    8 +-
 .../apache/solr/core/TestSolrConfigHandler.java    |    5 +-
 .../src/test/org/apache/solr/core/TestSolrXml.java |   83 +-
 .../org/apache/solr/handler/JsonLoaderTest.java    |    2 +-
 .../org/apache/solr/handler/TestConfigReload.java  |    2 +-
 .../org/apache/solr/handler/TestReqParamsAPI.java  |    4 +-
 .../solr/handler/TestSQLHandlerNonCloud.java       |   20 +-
 .../solr/handler/TestSolrConfigHandlerCloud.java   |    1 +
 .../handler/TestSolrConfigHandlerConcurrent.java   |   16 +-
 .../solr/handler/XmlUpdateRequestHandlerTest.java  |   19 +-
 .../solr/handler/admin/AdminHandlersProxyTest.java |    1 +
 .../handler/admin/CoreAdminCreateDiscoverTest.java |   13 +-
 .../solr/handler/admin/CoreAdminHandlerTest.java   |    9 +-
 .../admin/CoreMergeIndexesAdminHandlerTest.java    |    8 -
 .../solr/handler/admin/DaemonStreamApiTest.java    |   67 +-
 .../solr/handler/admin/LukeRequestHandlerTest.java |   12 +-
 .../admin/SegmentsInfoRequestHandlerTest.java      |    2 +-
 .../handler/admin/ShowFileRequestHandlerTest.java  |   11 +-
 .../solr/handler/admin/StatsReloadRaceTest.java    |    2 +
 .../solr/handler/component/BadComponentTest.java   |    1 +
 .../handler/component/CloudReplicaSourceTest.java  |    1 -
 .../component/CustomHighlightComponentTest.java    |    2 +
 .../component/DistributedDebugComponentTest.java   |   36 +-
 .../handler/component/InfixSuggestersTest.java     |    1 +
 .../component/QueryElevationComponentTest.java     |    8 -
 .../handler/component/ShardsWhitelistTest.java     |   46 +-
 .../SuggestComponentContextFilterQueryTest.java    |   17 +-
 .../handler/component/TermVectorComponentTest.java |    7 -
 .../component/TestHttpShardHandlerFactory.java     |   40 +-
 .../apache/solr/handler/tagger/TaggerTestCase.java |    6 +
 .../solr/handler/tagger/TaggingAttributeTest.java  |   12 +-
 .../solr/handler/tagger/XmlInterpolationTest.java  |   23 +-
 .../solr/highlight/FastVectorHighlighterTest.java  |   13 +-
 .../solr/highlight/HighlighterMaxOffsetTest.java   |    4 +-
 .../org/apache/solr/highlight/HighlighterTest.java |    2 +-
 .../highlight/TestPostingsSolrHighlighter.java     |   18 +-
 .../solr/index/WrapperMergePolicyFactoryTest.java  |   16 +-
 .../org/apache/solr/metrics/JvmMetricsTest.java    |   18 +-
 .../org/apache/solr/metrics/MetricsConfigTest.java |   17 +-
 .../solr/metrics/SolrCoreMetricManagerTest.java    |   13 +-
 .../apache/solr/metrics/SolrMetricManagerTest.java |  107 +-
 .../solr/metrics/SolrMetricsIntegrationTest.java   |    8 +-
 .../reporters/SolrGraphiteReporterTest.java        |    2 +-
 .../metrics/reporters/SolrJmxReporterTest.java     |    2 +
 .../metrics/reporters/SolrSlf4jReporterTest.java   |    2 +-
 .../reporters/solr/SolrShardReporterTest.java      |    2 +-
 .../apache/solr/request/TestRemoteStreaming.java   |   22 +-
 .../org/apache/solr/request/TestStreamBody.java    |    6 +-
 .../solr/request/TestUnInvertedFieldException.java |    7 +-
 .../solr/response/TestBinaryResponseWriter.java    |   11 +-
 .../solr/response/TestCSVResponseWriter.java       |    2 +
 .../solr/response/TestGeoJSONResponseWriter.java   |   13 +-
 .../solr/response/TestGraphMLResponseWriter.java   |    4 +-
 .../solr/response/TestRetrieveFieldsOptimizer.java |   97 +-
 .../TestChildDocTransformerHierarchy.java          |    2 +
 .../transform/TestExplainDocTransformer.java       |    9 +-
 .../org/apache/solr/rest/SolrRestletTestBase.java  |   16 +-
 .../org/apache/solr/rest/TestManagedResource.java  |  108 +-
 .../solr/rest/TestManagedResourceStorage.java      |    6 +
 .../test/org/apache/solr/rest/TestRestManager.java |    3 +-
 .../apache/solr/rest/schema/TestFieldResource.java |   13 +
 .../schema/TestFieldTypeCollectionResource.java    |   12 +
 .../solr/rest/schema/TestSchemaNameResource.java   |    1 +
 .../schema/TestSerializedLuceneMatchVersion.java   |    8 +-
 .../rest/schema/TestUniqueKeyFieldResource.java    |   13 +
 .../analysis/TestManagedStopFilterFactory.java     |   30 +-
 .../analysis/TestManagedSynonymFilterFactory.java  |   19 +-
 .../TestManagedSynonymGraphFilterFactory.java      |    8 +-
 .../org/apache/solr/schema/BadIndexSchemaTest.java |    2 +
 .../org/apache/solr/schema/BooleanFieldTest.java   |   11 +-
 .../test/org/apache/solr/schema/DateFieldTest.java |   18 +-
 .../solr/schema/ExternalFileFieldSortTest.java     |    4 +
 .../solr/schema/IndexSchemaRuntimeFieldTest.java   |   11 +-
 .../schema/ManagedSchemaRoundRobinCloudTest.java   |    2 +
 .../org/apache/solr/schema/NumericFieldsTest.java  |    9 +-
 .../schema/OpenExchangeRatesOrgProviderTest.java   |   11 +-
 .../PreAnalyzedFieldManagedSchemaCloudTest.java    |    2 +
 .../apache/solr/schema/PreAnalyzedFieldTest.java   |   14 +-
 .../apache/solr/schema/PrimitiveFieldTypeTest.java |    5 +-
 .../org/apache/solr/schema/SchemaWatcherTest.java  |   12 +-
 .../apache/solr/schema/SynonymTokenizerTest.java   |    2 +-
 .../org/apache/solr/schema/TestBinaryField.java    |    4 +-
 .../apache/solr/schema/TestCloudSchemaless.java    |    4 +-
 .../solr/schema/TestHalfAndHalfDocValues.java      |   13 +-
 .../apache/solr/schema/TestManagedSchemaAPI.java   |    1 +
 .../solr/schema/TestUseDocValuesAsStored.java      |    8 +-
 .../solr/schema/TestUseDocValuesAsStored2.java     |   34 +-
 .../org/apache/solr/search/MergeStrategyTest.java  |   16 +-
 .../org/apache/solr/search/SpatialFilterTest.java  |   18 +-
 .../apache/solr/search/TestHashQParserPlugin.java  |   20 +-
 .../org/apache/solr/search/TestIndexSearcher.java  |    5 +-
 .../org/apache/solr/search/TestMissingGroups.java  |    8 +-
 .../solr/search/TestPayloadScoreQParserPlugin.java |   13 +-
 .../test/org/apache/solr/search/TestReload.java    |    2 +
 .../org/apache/solr/search/TestSolr4Spatial2.java  |    3 +
 .../solr/search/TestSurroundQueryParser.java       |   10 +-
 .../search/facet/TestCloudJSONFacetJoinDomain.java |    1 +
 .../solr/search/facet/TestCloudJSONFacetSKG.java   |    4 +
 .../search/facet/TestCloudJSONFacetSKGEquiv.java   |    4 +-
 .../facet/TestJsonFacetsWithNestedObjects.java     |   12 +-
 .../solr/search/function/SortByFunctionTest.java   |   13 +-
 .../function/distance/DistanceFunctionTest.java    |    2 +
 .../org/apache/solr/search/join/BJQParserTest.java |    5 +-
 .../org/apache/solr/search/join/XCJFQueryTest.java |   11 +-
 .../similarities/TestIBSimilarityFactory.java      |   14 +-
 .../TestPerFieldSimilarityWithDefaultOverride.java |   16 +-
 .../TestSweetSpotSimilarityFactory.java            |   12 +-
 .../apache/solr/search/stats/TestDistribIDF.java   |   11 +-
 .../solr/security/BasicAuthIntegrationTest.java    |    5 +-
 .../solr/security/BasicAuthOnSingleNodeTest.java   |    1 -
 .../solr/security/BasicAuthStandaloneTest.java     |    2 +-
 .../security/JWTAuthPluginIntegrationTest.java     |    7 +-
 .../apache/solr/security/JWTAuthPluginTest.java    |    1 +
 .../security/PKIAuthenticationIntegrationTest.java |    1 +
 .../hadoop/TestDelegationWithHadoopAuth.java       |    1 +
 .../hadoop/TestImpersonationWithHadoopAuth.java    |    1 +
 .../hadoop/TestSolrCloudWithHadoopAuthPlugin.java  |    3 +-
 .../solr/spelling/IndexBasedSpellCheckerTest.java  |   21 +-
 .../SpellCheckCollatorWithCollapseTest.java        |   15 +-
 .../spelling/WordBreakSolrSpellCheckerTest.java    |   12 +-
 .../org/apache/solr/update/AddBlockUpdateTest.java |    1 +
 .../apache/solr/update/SolrCmdDistributorTest.java |    2 +-
 .../apache/solr/update/SolrIndexConfigTest.java    |  254 +--
 .../test/org/apache/solr/update/UpdateLogTest.java |   15 +-
 .../org/apache/solr/update/VersionInfoTest.java    |    4 +
 .../processor/DistributedUpdateProcessorTest.java  |    1 +
 .../update/processor/NestedAtomicUpdateTest.java   |   12 +-
 .../ParsingFieldUpdateProcessorsTest.java          |   14 +-
 .../processor/TolerantUpdateProcessorTest.java     |   16 +-
 .../processor/UUIDUpdateProcessorFallbackTest.java |   12 +-
 .../test/org/apache/solr/util/AuthToolTest.java    |    1 +
 .../src/test/org/apache/solr/util/DOMUtilTest.java |   67 +-
 .../org/apache/solr/util/OrderedExecutorTest.java  |    4 +-
 .../org/apache/solr/util/SimplePostToolTest.java   |    4 +-
 .../test/org/apache/solr/util/TestExportTool.java  |    1 -
 .../org/apache/solr/util/TestSystemIdResolver.java |    9 +-
 solr/server/etc/jetty-http.xml                     |    3 +-
 solr/server/etc/jetty.xml                          |    4 +-
 solr/server/resources/log4j2.xml                   |    2 +-
 solr/solrj/build.gradle                            |    4 +
 .../solr/client/solrj/cloud/DistributedLock.java   |    2 +-
 .../solr/client/solrj/cloud/DistributedQueue.java  |   20 -
 .../apache/solr/client/solrj/cloud/ShardTerms.java |   26 +-
 .../client/solrj/impl/BaseCloudSolrClient.java     |    7 +-
 .../solrj/impl/BaseHttpClusterStateProvider.java   |   40 +-
 .../impl/ConcurrentUpdateHttp2SolrClient.java      |    1 -
 .../solrj/impl/ConcurrentUpdateSolrClient.java     |   10 +-
 .../solr/client/solrj/impl/Http2SolrClient.java    |  291 ++-
 .../solr/client/solrj/impl/HttpSolrClient.java     |    2 +-
 .../solr/client/solrj/impl/LBHttp2SolrClient.java  |    1 +
 .../solr/client/solrj/impl/LBSolrClient.java       |   17 +-
 .../solrj/impl/SolrClientNodeStateProvider.java    |   23 +-
 .../solrj/impl/ZkClientClusterStateProvider.java   |    7 +-
 .../client/solrj/impl/ZkDistribStateManager.java   |    2 +-
 .../solr/client/solrj/io/SolrClientCache.java      |    8 +-
 .../client/solrj/io/sql/DatabaseMetaDataImpl.java  |    2 +-
 .../solrj/io/stream/FeaturesSelectionStream.java   |    3 +-
 .../client/solrj/io/stream/TextLogitStream.java    |    4 +-
 .../solr/client/solrj/io/stream/TopicStream.java   |   32 +-
 .../solr/client/solrj/io/stream/TupleStream.java   |    3 +-
 .../client/solrj/io/stream/expr/StreamFactory.java |    2 +-
 .../solrj/request/ConfigSetAdminRequest.java       |    6 +-
 .../solrj/request/ContentStreamUpdateRequest.java  |    9 +-
 .../solrj/request/DocumentAnalysisRequest.java     |    7 +-
 .../solr/client/solrj/request/RequestWriter.java   |   41 +-
 .../solr/client/solrj/request/UpdateRequest.java   |    1 -
 .../client/solrj/util/SolrBasicAuthentication.java |    4 +-
 .../src/java/org/apache/solr/common/ParWork.java   |   52 +-
 .../org/apache/solr/common/ParWorkExecutor.java    |   21 +-
 .../apache/solr/common/PerThreadExecService.java   |    6 +-
 .../java/org/apache/solr/common/SolrException.java |    2 +-
 .../apache/solr/common/ToleratedUpdateError.java   |    3 +-
 .../org/apache/solr/common/cloud/ClusterState.java |   71 +-
 .../apache/solr/common/cloud/ClusterStateUtil.java |    6 +-
 .../solr/common/cloud/ConnectionManager.java       |   48 +-
 .../apache/solr/common/cloud/DocCollection.java    |   41 +-
 .../solr/common/cloud/LiveNodesListener.java       |    3 +-
 .../solr/common/cloud/LiveNodesPredicate.java      |    2 +-
 .../solr/common/cloud/NodesSysPropsCacher.java     |    4 +-
 .../org/apache/solr/common/cloud/OnReconnect.java  |    1 +
 .../java/org/apache/solr/common/cloud/Replica.java |    2 +-
 .../org/apache/solr/common/cloud/SolrZkClient.java |  111 +-
 .../apache/solr/common/cloud/SolrZooKeeper.java    |   38 +-
 .../apache/solr/common/cloud/ZkCmdExecutor.java    |    2 +-
 .../apache/solr/common/cloud/ZkStateReader.java    |  732 ++++---
 .../apache/solr/common/util/ContentStreamBase.java |    9 +-
 .../org/apache/solr/common/util/ExecutorUtil.java  |   12 +-
 .../solr/common/util/ObjectReleaseTracker.java     |   35 +-
 .../solr/common/util/SolrQueuedThreadPool.java     | 1579 +++++++--------
 .../util/SolrScheduledExecutorScheduler.java       |    8 +-
 .../java/org/apache/solr/common/util/Utils.java    |   45 +-
 .../apache/solr/common/util/ValidatingJsonMap.java |    2 +-
 .../org/apache/zookeeper/ZooKeeperExposed.java     |   19 +-
 .../UsingSolrJRefGuideExamplesTest.java            |    6 +-
 .../ref_guide_examples/ZkConfigFilesTest.java      |    9 +-
 .../client/solrj/MergeIndexesExampleTestBase.java  |    2 +-
 .../client/solrj/SolrExampleBinaryHttp2Test.java   |    2 +-
 .../apache/solr/client/solrj/SolrExampleTests.java |  144 +-
 .../solr/client/solrj/SolrExampleTestsBase.java    |   98 +-
 .../client/solrj/SolrSchemalessExampleTest.java    |   30 +-
 .../apache/solr/client/solrj/TestBatchUpdate.java  |   15 +-
 .../solr/client/solrj/TestLBHttpSolrClient.java    |   36 +-
 .../solr/client/solrj/TestSolrJErrorHandling.java  |   49 +-
 .../AbstractEmbeddedSolrServerTestCase.java        |   25 +-
 .../client/solrj/embedded/JettyWebappTest.java     |    6 +-
 .../solrj/embedded/SolrExampleEmbeddedTest.java    |    4 -
 .../solrj/embedded/SolrExampleStreamingTest.java   |    7 -
 .../solrj/embedded/SolrExampleXMLHttp2Test.java    |    8 +-
 .../solrj/embedded/TestEmbeddedSolrServer.java     |   18 +-
 .../client/solrj/embedded/TestSolrProperties.java  |    8 +-
 .../solrj/impl/BaseSolrClientWireMockTest.java     |   13 +-
 .../client/solrj/impl/BasicHttpSolrClientTest.java |    9 +-
 .../solrj/impl/CloudHttp2SolrClientTest.java       |    2 +-
 .../solrj/impl/CloudSolrClientCacheTest.java       |    2 +-
 .../client/solrj/impl/CloudSolrClientTest.java     |    2 +-
 ...oncurrentUpdateHttp2SolrClientBadInputTest.java |    8 +
 .../ConcurrentUpdateSolrClientBadInputTest.java    |    7 +
 .../solrj/impl/ConcurrentUpdateSolrClientTest.java |   43 +-
 .../impl/Http2SolrClientCompatibilityTest.java     |    6 -
 .../solrj/impl/HttpSolrClientBadInputTest.java     |   15 +-
 .../solrj/impl/HttpSolrClientConPoolTest.java      |   34 +-
 .../impl/HttpSolrClientSSLAuthConPoolTest.java     |   24 +-
 .../solrj/impl/LBHttpSolrClientBadInputTest.java   |    7 +-
 .../client/solrj/io/graph/GraphExpressionTest.java |    4 +-
 .../client/solrj/io/stream/JDBCStreamTest.java     |   95 +-
 .../solrj/io/stream/SelectWithEvaluatorsTest.java  |    7 -
 .../solr/client/solrj/io/stream/StreamingTest.java |    9 +
 .../solr/client/solrj/request/SchemaTest.java      |   22 +-
 .../solr/client/solrj/request/TestCoreAdmin.java   |   25 +-
 .../solr/client/solrj/request/TestV2Request.java   |    2 +-
 .../solrj/response/NoOpResponseParserTest.java     |   10 +-
 .../solrj/response/TestSpellCheckResponse.java     |    2 +-
 .../solrj/response/TestSuggesterResponse.java      |    2 +-
 .../solr/common/cloud/TestZkConfigManager.java     |    4 +-
 solr/solrj/src/test/org/noggit/TestJSONParser.java |   41 +-
 .../apache/solr/BaseDistributedSearchTestCase.java |   37 +-
 .../org/apache/solr/SolrIgnoredThreadsFilter.java  |   21 +-
 .../java/org/apache/solr/SolrJettyTestBase.java    |   59 +-
 .../src/java/org/apache/solr/SolrTestCase.java     |  246 ++-
 .../src/java/org/apache/solr/SolrTestCaseHS.java   |    7 +-
 .../src/java/org/apache/solr/SolrTestCaseJ4.java   |   41 +-
 .../solr/cloud/AbstractFullDistribZkTestBase.java  |   22 +-
 .../org/apache/solr/cloud/AbstractZkTestCase.java  |   37 +-
 .../java/org/apache/solr/cloud/ChaosMonkey.java    |    8 +-
 .../apache/solr/cloud/MiniSolrCloudCluster.java    |   50 +-
 .../org/apache/solr/cloud/MockZkStateReader.java   |    8 +-
 .../apache/solr/cloud/MultiSolrCloudTestCase.java  |   45 +-
 .../apache/solr/cloud/SolrCloudAuthTestCase.java   |   59 +-
 .../org/apache/solr/cloud/SolrCloudTestCase.java   |   31 +-
 .../java/org/apache/solr/cloud/ZkTestServer.java   |   85 +-
 .../java/org/apache/solr/util/BaseTestHarness.java |   65 +-
 .../java/org/apache/solr/util/DOMUtilTestBase.java |   10 +-
 .../java/org/apache/solr/util/RestTestBase.java    |  116 +-
 .../java/org/apache/solr/util/RestTestHarness.java |   15 +-
 .../src/java/org/apache/solr/util/TestHarness.java |   86 +-
 .../apache/zookeeper/server/ZooKeeperServer.java   | 2081 ++++++++++++++++++++
 .../src/resources/logconf/log4j2-fast.xml          |    2 +-
 .../src/resources/logconf/log4j2-startup-debug.xml |   11 +-
 .../solr/cloud/MiniSolrCloudClusterTest.java       |   15 +-
 versions.lock                                      |   11 +-
 versions.props                                     |    3 +-
 570 files changed, 12193 insertions(+), 9171 deletions(-)

diff --git a/gradle/testing/defaults-tests.gradle b/gradle/testing/defaults-tests.gradle
index a277d32..351863a 100644
--- a/gradle/testing/defaults-tests.gradle
+++ b/gradle/testing/defaults-tests.gradle
@@ -99,13 +99,15 @@ allprojects {
         maxParallelForks = 1
       }
 
+
+
       workingDir testsCwd
       useJUnit()
 
       minHeapSize = propertyOrDefault("tests.minheapsize", "512m")
       maxHeapSize = propertyOrDefault("tests.heapsize", "512m")
 
-      jvmArgs Commandline.translateCommandline(propertyOrDefault("tests.jvmargs", "-XX:TieredStopAtLevel=1 -XX:+UseParallelGC -XX:-UseBiasedLocking -DconfigurationFile=log4j2.xml -Dorg.apache.xml.dtm.DTMManager=org.apache.xml.dtm.ref.DTMManagerDefault"));
+      jvmArgs Commandline.translateCommandline(propertyOrDefault("tests.jvmargs", "-XX:TieredStopAtLevel=1 -XX:+UseParallelGC -XX:-UseBiasedLocking -Djava.net.preferIPv4Stack=true -DconfigurationFile=log4j2.xml -Dorg.apache.xml.dtm.DTMManager=org.apache.xml.dtm.ref.DTMManagerDefault"));
 
       ignoreFailures = resolvedTestOption("tests.haltonfailure").toBoolean() == false
 
diff --git a/lucene/core/src/java/org/apache/lucene/store/NativeFSLockFactory.java b/lucene/core/src/java/org/apache/lucene/store/NativeFSLockFactory.java
index c61e4a3..b1629d8 100644
--- a/lucene/core/src/java/org/apache/lucene/store/NativeFSLockFactory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/NativeFSLockFactory.java
@@ -140,7 +140,7 @@ public final class NativeFSLockFactory extends FSLockFactory {
     }
   }
   
-  private static final void clearLockHeld(Path path) throws IOException {
+  public static final void clearLockHeld(Path path) throws IOException {
     boolean remove = LOCK_HELD.remove(path.toString());
     if (remove == false) {
       throw new AlreadyClosedException("Lock path was cleared but never marked as held: " + path);
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
index 2c7c04a..ad3a4ee 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
@@ -588,10 +588,10 @@ public abstract class LuceneTestCase extends Assert {
   }
 
   /**
-   * Max 10mb of static data stored in a test suite class after the suite is complete.
+   * Max static data stored in a test suite class after the suite is complete.
    * Prevents static data structures leaking and causing OOMs in subsequent tests.
    */
-  private final static long STATIC_LEAK_THRESHOLD = 5 * 1024;
+  private final static long STATIC_LEAK_THRESHOLD = 600;
 
   /** By-name list of ignored types like loggers etc. */
   private final static Set<String> STATIC_LEAK_IGNORED_TYPES = Set.of(
@@ -618,7 +618,7 @@ public abstract class LuceneTestCase extends Assert {
       //.around(new TestRuleLimitSysouts(suiteFailureMarker))
       .around(tempFilesCleanupRule = new TestRuleTemporaryFilesCleanup(suiteFailureMarker));
     // TODO LUCENE-7595: Java 9 does not allow to look into runtime classes, so we have to fix the RAM usage checker!
-    if (!Constants.JRE_IS_MINIMUM_JAVA9) {
+    //if (!Constants.JRE_IS_MINIMUM_JAVA9) {
       r = r.around(new StaticFieldsInvariantRule(STATIC_LEAK_THRESHOLD, true) {
         @Override
         protected boolean accept(java.lang.reflect.Field field) {
@@ -633,7 +633,7 @@ public abstract class LuceneTestCase extends Assert {
           return super.accept(field);
         }
       });
-    }
+    //}
     classRules = r.around(new NoClassHooksShadowingRule())
       .around(new NoInstanceHooksOverridesRule() {
         @Override
diff --git a/solr/cloud-dev/cloud.sh b/solr/cloud-dev/cloud.sh
index 76f2491..fd731e0 100755
--- a/solr/cloud-dev/cloud.sh
+++ b/solr/cloud-dev/cloud.sh
@@ -330,7 +330,7 @@ start(){
   echo "Final NUM_NODES is $NUM_NODES"
   for i in `seq 1 $NUM_NODES`; do
     mkdir -p "${CLUSTER_WD}/n${i}"
-    argsArray=(-c -s $CLUSTER_WD_FULL/n${i} -z localhost:${ZK_PORT}/solr_${SAFE_DEST} -p 898${i} -m $MEMORY \
+    argsArray=(-c -s $CLUSTER_WD_FULL/n${i} -z localhost:${ZK_PORT}/solr_${SAFE_DEST} -f -p 898${i} -m $MEMORY \
     -a "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=500${i} \
     -Dsolr.solrxml.location=zookeeper -Dsolr.log.dir=$CLUSTER_WD_FULL/n${i} $JVM_ARGS")
     FINAL_COMMAND="${SOLR}/bin/solr ${argsArray[@]}"
@@ -416,4 +416,4 @@ case ${COMMAND} in
     start
   ;;
   *) echo "Invalid command $COMMAND"; exit 2;
-esac
\ No newline at end of file
+esac
diff --git a/solr/contrib/analytics/src/java/org/apache/solr/analytics/stream/AnalyticsShardRequestManager.java b/solr/contrib/analytics/src/java/org/apache/solr/analytics/stream/AnalyticsShardRequestManager.java
index 2ab382f..02b5024 100644
--- a/solr/contrib/analytics/src/java/org/apache/solr/analytics/stream/AnalyticsShardRequestManager.java
+++ b/solr/contrib/analytics/src/java/org/apache/solr/analytics/stream/AnalyticsShardRequestManager.java
@@ -104,7 +104,7 @@ public class AnalyticsShardRequestManager {
 
       ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
       ClusterState clusterState = zkStateReader.getClusterState();
-      Set<String> liveNodes = clusterState.getLiveNodes();
+      Set<String> liveNodes = zkStateReader.getLiveNodes();
 
       Collection<Slice> slices = clusterState.getCollection(collection).getActiveSlices();
 
diff --git a/solr/contrib/analytics/src/test/org/apache/solr/analytics/ExpressionFactoryTest.java b/solr/contrib/analytics/src/test/org/apache/solr/analytics/ExpressionFactoryTest.java
index 3e0d022..962c599 100644
--- a/solr/contrib/analytics/src/test/org/apache/solr/analytics/ExpressionFactoryTest.java
+++ b/solr/contrib/analytics/src/test/org/apache/solr/analytics/ExpressionFactoryTest.java
@@ -24,7 +24,9 @@ import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.analytics.function.ReductionCollectionManager;
 import org.apache.solr.analytics.value.constant.ConstantValue;
 import org.apache.solr.schema.IndexSchema;
+import org.junit.After;
 import org.junit.AfterClass;
+import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -32,8 +34,8 @@ public class ExpressionFactoryTest extends SolrTestCaseJ4 {
 
   private static IndexSchema indexSchema;
 
-  @BeforeClass
-  public static void createSchemaAndFields() throws Exception {
+  @Before
+  public void createSchemaAndFields() throws Exception {
     initCore("solrconfig-analytics.xml","schema-analytics.xml");
     assertU(adoc("id", "1",
         "int_i", "1",
@@ -56,8 +58,9 @@ public class ExpressionFactoryTest extends SolrTestCaseJ4 {
     indexSchema = h.getCore().getLatestSchema();
   }
 
-  @AfterClass
-  public static void cleanUp() throws Exception {
+  @After
+  public void cleanUp() throws Exception {
+    deleteCore();
     indexSchema = null;
   }
 
@@ -68,6 +71,7 @@ public class ExpressionFactoryTest extends SolrTestCaseJ4 {
   }
 
   @Test
+  @Nightly
   public void userDefinedVariableFunctionTest() {
     ExpressionFactory fact = getExpressionFactory();
 
@@ -171,6 +175,7 @@ public class ExpressionFactoryTest extends SolrTestCaseJ4 {
   }
 
   @Test
+  @Nightly
   public void reductionManagerCreationTest() {
     ExpressionFactory fact = getExpressionFactory();
 
diff --git a/solr/contrib/analytics/src/test/org/apache/solr/analytics/function/field/AbstractAnalyticsFieldTest.java b/solr/contrib/analytics/src/test/org/apache/solr/analytics/function/field/AbstractAnalyticsFieldTest.java
index 3c9ee6e..c4479ea 100644
--- a/solr/contrib/analytics/src/test/org/apache/solr/analytics/function/field/AbstractAnalyticsFieldTest.java
+++ b/solr/contrib/analytics/src/test/org/apache/solr/analytics/function/field/AbstractAnalyticsFieldTest.java
@@ -231,6 +231,24 @@ public class AbstractAnalyticsFieldTest extends SolrTestCaseJ4 {
     indexSchema = null;
     searcher = null;
     ref = null;
+
+    singleInts = null;
+    multiInts = null;
+    singleLongs = null;
+    multiLongs = null;
+    singleFloats = null;
+    multiFloats = null;
+    singleDoubles = null;
+    multiDoubles = null;
+    singleDates = null;
+    multiDates = null;
+    singleStrings = null;
+    multiStrings = null;
+    singleBooleans = null;
+    multiBooleans = null;
+
+    missingDocuments = null;
+
   }
 
   protected <T> void checkSingleFieldValues(Map<String,T> expected, Map<String,T> found, Set<String> missing) {
diff --git a/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/LegacyAbstractAnalyticsTest.java b/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/LegacyAbstractAnalyticsTest.java
index 3d65828..f7454f8 100644
--- a/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/LegacyAbstractAnalyticsTest.java
+++ b/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/LegacyAbstractAnalyticsTest.java
@@ -31,7 +31,6 @@ import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathExpressionException;
-import javax.xml.xpath.XPathFactory;
 
 import org.apache.commons.lang3.StringUtils;
 import org.apache.lucene.util.IOUtils;
@@ -39,7 +38,6 @@ import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.analytics.util.AnalyticsResponseHeadings;
 import org.apache.solr.analytics.util.MedianCalculator;
 import org.apache.solr.analytics.util.OrdinalCalculator;
-import org.apache.solr.core.XmlConfigFile;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.rest.schema.FieldTypeXmlAdapter;
 import org.junit.AfterClass;
@@ -77,18 +75,15 @@ public class LegacyAbstractAnalyticsTest extends SolrTestCaseJ4 {
   }
 
   static private Document doc;
-  static private XPathFactory xPathFact;
 
   static private String rawResponse;
 
   @BeforeClass
   public static void beforeClassAbstractAnalysis() {
-    xPathFact = XmlConfigFile.xpathFactory;
   }
 
   @AfterClass
   public static void afterClassAbstractAnalysis() {
-    xPathFact = null;
     doc = null;
     rawResponse = null;
     defaults.clear();
@@ -112,7 +107,7 @@ public class LegacyAbstractAnalyticsTest extends SolrTestCaseJ4 {
     // This is a little fragile in that it demands the elements have the same name as type, i.e. when looking for a
     // VAL_TYPE.DOUBLE, the element in question is <double name="blah">47.0</double>.
     sb.append("/").append(type.toString()).append("[@name='").append(name).append("']");
-    String val = xPathFact.newXPath().compile(sb.toString()).evaluate(doc, XPathConstants.STRING).toString();
+    String val = h.getCore().getResourceLoader().getXPath().compile(sb.toString()).evaluate(doc, XPathConstants.STRING).toString();
     try {
       switch (type) {
         case INTEGER: return Integer.parseInt(val);
diff --git a/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/LegacyNoFacetTest.java b/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/LegacyNoFacetTest.java
index da737c8..c2ded0d 100644
--- a/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/LegacyNoFacetTest.java
+++ b/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/LegacyNoFacetTest.java
@@ -16,9 +16,11 @@
  */
 package org.apache.solr.analytics.legacy;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -135,6 +137,22 @@ public class LegacyNoFacetTest extends LegacyAbstractAnalyticsTest {
     setResponse(h.query(request(fileToStringArr(LegacyNoFacetTest.class, fileName))));
   }
 
+  @AfterClass
+  public static void afterLegacyNoFacetTest() throws IOException {
+    intTestStart = null;
+    intMissing = 0;
+    longTestStart = null;
+    longMissing = 0;
+    floatTestStart = null;
+    floatMissing = 0;
+    doubleTestStart = null;
+    doubleMissing = 0;
+    dateTestStart = null;
+    dateMissing = 0;
+    stringTestStart = null;
+    stringMissing = 0;
+  }
+
   @Test
   public void sumTest() throws Exception {
     //Int
diff --git a/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyAbstractAnalyticsFacetTest.java b/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyAbstractAnalyticsFacetTest.java
index a4e27b7..086fe09 100644
--- a/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyAbstractAnalyticsFacetTest.java
+++ b/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyAbstractAnalyticsFacetTest.java
@@ -22,9 +22,9 @@ import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.analytics.util.AnalyticsResponseHeadings;
 import org.apache.solr.analytics.util.MedianCalculator;
 import org.apache.solr.analytics.util.OrdinalCalculator;
-import org.apache.solr.core.XmlConfigFile;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.rest.schema.FieldTypeXmlAdapter;
+import org.apache.solr.util.TestHarness;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.w3c.dom.Document;
@@ -58,17 +58,17 @@ public class LegacyAbstractAnalyticsFacetTest extends SolrTestCaseJ4 {
   protected String latestType = "";
 
   private static Document doc;
-  private static XPathFactory xPathFact;
+
   private static String rawResponse;
 
   @BeforeClass
   public static void beforeClassAbstractAnalysis() {
-    xPathFact = XmlConfigFile.xpathFactory;
+
   }
 
   @AfterClass
   public static void afterClassAbstractAnalysis() {
-    xPathFact = null;
+
     doc = null;
     rawResponse = null;
     defaults.clear();
@@ -85,7 +85,7 @@ public class LegacyAbstractAnalyticsFacetTest extends SolrTestCaseJ4 {
   }
 
   protected Node getNode(String xPath) throws XPathExpressionException {
-    return (Node) XmlConfigFile.getXpath().compile(xPath).evaluate(doc, XPathConstants.NODE);
+    return (Node) h.getXpath().compile(xPath).evaluate(doc, XPathConstants.NODE);
   }
   private NodeList getNodes(String n1, String n2, String n3, String element, String n4) throws XPathExpressionException {
     // Construct the XPath expression. The form better not change or all these will fail.
@@ -94,7 +94,7 @@ public class LegacyAbstractAnalyticsFacetTest extends SolrTestCaseJ4 {
     sb.append("/lst[@name='").append(n3).append("']");
     sb.append("/lst[@name!='(MISSING)']");
     sb.append("//").append(element).append("[@name='").append(n4).append("']");
-    return (NodeList) XmlConfigFile.getXpath().compile(sb.toString()).evaluate(doc, XPathConstants.NODESET);
+    return (NodeList) h.getXpath().compile(sb.toString()).evaluate(doc, XPathConstants.NODESET);
 
   }
   protected ArrayList<String> getStringList(String n1, String n2, String n3, String element, String n4)
@@ -333,7 +333,7 @@ public class LegacyAbstractAnalyticsFacetTest extends SolrTestCaseJ4 {
 
   protected NodeList getNodes(String xPath) throws XPathExpressionException {
     StringBuilder sb = new StringBuilder(xPath);
-    return (NodeList) XmlConfigFile.getXpath().compile(sb.toString()).evaluate(doc, XPathConstants.NODESET);
+    return (NodeList) h.getXpath().compile(sb.toString()).evaluate(doc, XPathConstants.NODESET);
   }
 
 }
diff --git a/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyFieldFacetExtrasTest.java b/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyFieldFacetExtrasTest.java
index 92ab2cc..4fdc97f 100644
--- a/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyFieldFacetExtrasTest.java
+++ b/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyFieldFacetExtrasTest.java
@@ -22,6 +22,7 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 
+import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -104,6 +105,14 @@ public class LegacyFieldFacetExtrasTest extends LegacyAbstractAnalyticsFacetTest
     setResponse(h.query(request(fileToStringArr(LegacyFieldFacetExtrasTest.class, fileName))));
   }
 
+  @AfterClass
+  public static void afterLegacyFieldFacetExtrasTest() {
+    intLongTestStart = null;
+    intFloatTestStart = null;
+    intDoubleTestStart = null;
+    intStringTestStart = null;
+  }
+
   @Test
   public void limitTest() throws Exception {
 
diff --git a/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyFieldFacetTest.java b/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyFieldFacetTest.java
index 843c605..f99e690 100644
--- a/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyFieldFacetTest.java
+++ b/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyFieldFacetTest.java
@@ -24,6 +24,7 @@ import java.util.List;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
+import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -420,6 +421,53 @@ public class LegacyFieldFacetTest extends LegacyAbstractAnalyticsFacetTest{
     setResponse(h.query(request(reqFacetParamas)));
   }
 
+  @AfterClass
+  public static void afterLegacyFieldFacetTest() {
+    //INT
+    intDateTestStart = null;
+    intDateTestMissing = null;
+    intStringTestStart = null;
+    intStringTestMissing = null;
+
+    //LONG
+    longDateTestStart = null;
+    longDateTestMissing = null;
+    longStringTestStart = null;
+    longStringTestMissing = null;
+
+    //FLOAT
+    floatDateTestStart = null;
+    floatDateTestMissing = null;
+    floatStringTestStart = null;
+    floatStringTestMissing = null;
+
+    //DOUBLE
+    doubleDateTestStart = null;
+    doubleDateTestMissing = null;
+    doubleStringTestStart = null;
+    doubleStringTestMissing = null;
+
+    //DATE
+    dateIntTestStart = null;
+    dateIntTestMissing = null;
+    dateLongTestStart = null;
+    dateLongTestMissing = null;
+
+    //String
+    stringIntTestStart = null;
+    stringIntTestMissing = null;
+    stringLongTestStart = null;
+    stringLongTestMissing = null;
+
+    //Multi-Valued
+    multiLongTestStart = null;
+    multiLongTestMissing = null;
+    multiStringTestStart = null;
+    multiStringTestMissing = null;
+    multiDateTestStart = null;
+    multiDateTestMissing = null;
+  }
+
   @Test
   public void timeAllowedTest() throws Exception {
     String query = "int_id: [0 TO " + random().nextInt(INT) + "] AND long_ld: [0 TO " + random().nextInt(LONG) + "]";
diff --git a/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyRangeFacetTest.java b/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyRangeFacetTest.java
index 7011ce6..0676874 100644
--- a/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyRangeFacetTest.java
+++ b/solr/contrib/analytics/src/test/org/apache/solr/analytics/legacy/facet/LegacyRangeFacetTest.java
@@ -19,6 +19,7 @@ package org.apache.solr.analytics.legacy.facet;
 
 import java.util.ArrayList;
 
+import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -115,6 +116,19 @@ public class LegacyRangeFacetTest extends LegacyAbstractAnalyticsFacetTest {
     setResponse(h.query(request(fileToStringArr(LegacyRangeFacetTest.class, fileName))));
   }
 
+  @AfterClass
+  public static void afterClass() throws Exception {
+    //INT
+    intLongTestStart = null;
+    intDoubleTestStart = null;
+    intDateTestStart = null;
+
+    //FLOAT
+    floatLongTestStart = null;
+    floatDoubleTestStart = null;
+    floatDateTestStart = null;
+  }
+
   @SuppressWarnings("unchecked")
   @Test
   public void rangeTest() throws Exception {
diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestHierarchicalDocBuilder.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestHierarchicalDocBuilder.java
index 07b627a..9a230be 100644
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestHierarchicalDocBuilder.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestHierarchicalDocBuilder.java
@@ -167,7 +167,7 @@ public class TestHierarchicalDocBuilder extends AbstractDataImportHandlerTestCas
     String xpath = "//arr[@name='documents']/lst[arr[@name='id']/str='"+parentId1+"']/"+
       "arr[@name='_childDocuments_']/lst[arr[@name='id']/str='"+childId+"']/"+
       "arr[@name='_childDocuments_']/lst[arr[@name='id']/str='"+grandChildrenIds.get(0)+"']";
-    String results = TestHarness.validateXPath(resp, xpath);
+    String results = TestHarness.validateXPath(h.getCore().getResourceLoader(), resp, xpath);
     assertTrue("Debug documents does not contain child documents\n"+resp+"\n"+ xpath+
                                                         "\n"+results, results == null);
     
diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSourceConvertType.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSourceConvertType.java
index ef1cc7b..e4576c8 100644
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSourceConvertType.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSourceConvertType.java
@@ -20,6 +20,8 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.BeforeClass;
 
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -30,11 +32,15 @@ import java.util.Map;
 import java.util.Objects;
 import java.util.Properties;
 
-@ThreadLeakAction({ThreadLeakAction.Action.WARN})
-@ThreadLeakLingering(linger = 0)
-@ThreadLeakZombies(ThreadLeakZombies.Consequence.CONTINUE)
-@ThreadLeakScope(ThreadLeakScope.Scope.NONE)
+@LuceneTestCase.AwaitsFix(bugUrl = "TODO: need to figure out how the derby BasicDaemon should be stopped, it has the ability")
 public class TestJdbcDataSourceConvertType extends AbstractDataImportHandlerTestCase {
+
+
+  @BeforeClass
+  public static void beforeTestJdbcDataSourceConvertType() {
+    interruptThreadsOnTearDown(false,"derby.rawStoreDaemon");
+  }
+
   public void testConvertType() throws Throwable {
     final Locale loc = Locale.getDefault();
     assumeFalse("Derby is not happy with locale sr-Latn-*",
diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestScriptTransformer.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestScriptTransformer.java
index cf60836..44418ea 100644
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestScriptTransformer.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestScriptTransformer.java
@@ -157,7 +157,7 @@ public class TestScriptTransformer extends AbstractDataImportHandlerTestCase {
     }
   }
 
-  static String xml = "<dataConfig>\n"
+  String xml = "<dataConfig>\n"
           + "<script><![CDATA[\n"
           + "function checkNextToken(row)\t{\n"
           + " var nt = row.get('nextToken');"
diff --git a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
index 2ac1c2d..c0be8fd 100644
--- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
+++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
@@ -22,13 +22,13 @@ import java.io.StringWriter;
 import java.lang.invoke.MethodHandles;
 import java.util.Locale;
 
-import org.apache.commons.io.IOUtils;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.UpdateParams;
 import org.apache.solr.common.util.ContentStream;
 import org.apache.solr.common.util.ContentStreamBase;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.Utils;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.handler.loader.ContentStreamLoader;
 import org.apache.solr.request.SolrQueryRequest;
@@ -253,7 +253,7 @@ public class ExtractingDocumentLoader extends ContentStreamLoader {
       } catch (SAXException e) {
         throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
       } finally {
-        IOUtils.closeQuietly(inputStream);
+        Utils.readFully(inputStream);
       }
     } else {
       throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Stream type of " + streamType + " didn't match any known parsers.  Please supply the " + ExtractingParams.STREAM_TYPE + " parameter.");
diff --git a/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ParseContextConfigTest.java b/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ParseContextConfigTest.java
index 8aeeaad..fca5f07 100644
--- a/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ParseContextConfigTest.java
+++ b/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ParseContextConfigTest.java
@@ -44,11 +44,13 @@ public class ParseContextConfigTest extends SolrTestCaseJ4 {
     entry.appendChild(property);
     entries.appendChild(entry);
 
-    ParseContext parseContext = new ParseContextConfig(new SolrResourceLoader(Paths.get(".")), entries).create();
+    try (SolrResourceLoader loader = new SolrResourceLoader(Paths.get("."))) {
+      ParseContext parseContext = new ParseContextConfig(loader, entries).create();
 
-    PDFParserConfig pdfParserConfig = parseContext.get(PDFParserConfig.class);
+      PDFParserConfig pdfParserConfig = parseContext.get(PDFParserConfig.class);
 
-    assertEquals(true, pdfParserConfig.getExtractInlineImages());
+      assertEquals(true, pdfParserConfig.getExtractInlineImages());
+    }
   }
 
 }
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
index cb3e624..b6fb1b4 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
@@ -210,7 +210,9 @@ public class TestLTROnSolrCloud extends TestRerankBase {
     for (JettySolrRunner solrRunner : solrCluster.getJettySolrRunners()) {
       if (!solrRunner.getCoreContainer().getCores().isEmpty()){
         String coreName = solrRunner.getCoreContainer().getCores().iterator().next().getName();
-        restTestHarness = new RestTestHarness(() -> solrRunner.getBaseUrl().toString() + "/" + coreName, solrCluster.getSolrClient().getHttpClient());
+        restTestHarness = new RestTestHarness(() -> solrRunner.getBaseUrl().toString() + "/" + coreName, solrCluster.getSolrClient().getHttpClient()
+            , solrCluster.getJettySolrRunners().get(0).getCoreContainer()
+            .getResourceLoader());
         break;
       }
     }
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
index decb1c0..df7d9a5 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
@@ -17,27 +17,31 @@
 package org.apache.solr.ltr;
 
 import org.apache.solr.client.solrj.SolrQuery;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.junit.After;
+import org.junit.Before;
+
 import org.junit.Test;
 
 public class TestLTRQParserPlugin extends TestRerankBase {
 
 
-  @BeforeClass
-  public static void before() throws Exception {
+  @Before
+  public void setUp() throws Exception {
     setuptest(true);
 
     loadFeatures("features-linear.json");
     loadModels("linear-model.json");
+    super.setUp();
   }
 
-  @AfterClass
-  public static void after() throws Exception {
+  @After
+  public void tearDown() throws Exception {
+    super.tearDown();
     aftertest();
   }
 
   @Test
+  @Nightly
   public void ltrModelIdMissingTest() throws Exception {
     final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}";
     final SolrQuery query = new SolrQuery();
@@ -52,6 +56,7 @@ public class TestLTRQParserPlugin extends TestRerankBase {
   }
 
   @Test
+  @Nightly
   public void ltrModelIdDoesNotExistTest() throws Exception {
     final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}";
     final SolrQuery query = new SolrQuery();
@@ -66,6 +71,7 @@ public class TestLTRQParserPlugin extends TestRerankBase {
   }
 
   @Test
+  @Nightly
   public void ltrBadRerankDocsTest() throws Exception {
     final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}";
     final SolrQuery query = new SolrQuery();
@@ -80,6 +86,7 @@ public class TestLTRQParserPlugin extends TestRerankBase {
   }
 
   @Test
+  @Nightly
   public void ltrMoreResultsThanReRankedTest() throws Exception {
     final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}";
     final SolrQuery query = new SolrQuery();
@@ -113,6 +120,7 @@ public class TestLTRQParserPlugin extends TestRerankBase {
   }
 
   @Test
+  @Nightly
   public void ltrNoResultsTest() throws Exception {
     final SolrQuery query = new SolrQuery();
     query.setQuery("title:bloomberg23");
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
index e921bcb..039a881 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
@@ -57,8 +57,6 @@ public class TestLTRReRankingPipeline extends SolrTestCase {
 
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
-  private static final SolrResourceLoader solrResourceLoader = new SolrResourceLoader();
-
   private IndexSearcher getSearcher(IndexReader r) {
     // 'yes' to maybe wrapping in general
     final boolean maybeWrap = true;
@@ -71,18 +69,18 @@ public class TestLTRReRankingPipeline extends SolrTestCase {
   }
 
   private static List<Feature> makeFieldValueFeatures(int[] featureIds,
-      String field) {
+      String field) throws IOException {
     final List<Feature> features = new ArrayList<>();
-    for (final int i : featureIds) {
-      final Map<String,Object> params = new HashMap<String,Object>();
-      params.put("field", field);
-      final Feature f = Feature.getInstance(solrResourceLoader,
-          FieldValueFeature.class.getName(),
-          "f" + i, params);
-      f.setIndex(i);
-      features.add(f);
+    try ( SolrResourceLoader solrResourceLoader = new SolrResourceLoader()) {
+      for (final int i : featureIds) {
+        final Map<String,Object> params = new HashMap<String,Object>();
+        params.put("field", field);
+        final Feature f = Feature.getInstance(solrResourceLoader, FieldValueFeature.class.getName(), "f" + i, params);
+        f.setIndex(i);
+        features.add(f);
+      }
+      return features;
     }
-    return features;
   }
 
   private static class MockModel extends LTRScoringModel {
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRScoringQuery.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRScoringQuery.java
index 973436f..b93a21a 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRScoringQuery.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRScoringQuery.java
@@ -55,39 +55,38 @@ import org.junit.Test;
 
 public class TestLTRScoringQuery extends SolrTestCase {
 
-  public final static SolrResourceLoader solrResourceLoader = new SolrResourceLoader();
-
   private IndexSearcher getSearcher(IndexReader r) {
     final IndexSearcher searcher = newSearcher(r, false, false);
     return searcher;
   }
 
-  private static List<Feature> makeFeatures(int[] featureIds) {
-    final List<Feature> features = new ArrayList<>();
-    for (final int i : featureIds) {
-      Map<String,Object> params = new HashMap<String,Object>();
-      params.put("value", i);
-      final Feature f = Feature.getInstance(solrResourceLoader,
-          ValueFeature.class.getName(),
-          "f" + i, params);
-      f.setIndex(i);
-      features.add(f);
+  private static List<Feature> makeFeatures(int[] featureIds) throws IOException {
+    try (SolrResourceLoader solrResourceLoader = new SolrResourceLoader()) {
+      final List<Feature> features = new ArrayList<>();
+      for (final int i : featureIds) {
+        Map<String,Object> params = new HashMap<String,Object>();
+        params.put("value", i);
+        final Feature f = Feature.getInstance(solrResourceLoader, ValueFeature.class.getName(), "f" + i, params);
+        f.setIndex(i);
+        features.add(f);
+      }
+
+      return features;
     }
-    return features;
   }
 
-  private static List<Feature> makeFilterFeatures(int[] featureIds) {
-    final List<Feature> features = new ArrayList<>();
-    for (final int i : featureIds) {
-      Map<String,Object> params = new HashMap<String,Object>();
-      params.put("value", i);
-      final Feature f = Feature.getInstance(solrResourceLoader,
-          ValueFeature.class.getName(),
-          "f" + i, params);
-      f.setIndex(i);
-      features.add(f);
+  private static List<Feature> makeFilterFeatures(int[] featureIds) throws IOException {
+    try (SolrResourceLoader solrResourceLoader = new SolrResourceLoader()) {
+      final List<Feature> features = new ArrayList<>();
+      for (final int i : featureIds) {
+        Map<String,Object> params = new HashMap<String,Object>();
+        params.put("value", i);
+        final Feature f = Feature.getInstance(solrResourceLoader, ValueFeature.class.getName(), "f" + i, params);
+        f.setIndex(i);
+        features.add(f);
+      }
+      return features;
     }
-    return features;
   }
 
   private LTRScoringQuery.ModelWeight performQuery(TopDocs hits,
@@ -116,7 +115,7 @@ public class TestLTRScoringQuery extends SolrTestCase {
   }
 
   @Test
-  public void testLTRScoringQueryEquality() throws ModelException {
+  public void testLTRScoringQueryEquality() throws ModelException, IOException {
     final List<Feature> features = makeFeatures(new int[] {0, 1, 2});
     final List<Normalizer> norms =
         new ArrayList<Normalizer>(
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java
index dbbaff1..7b6db02 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java
@@ -38,7 +38,6 @@ import org.apache.solr.common.util.ContentStream;
 import org.apache.solr.common.util.ContentStreamBase;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.core.SolrCore;
-import org.apache.solr.core.SolrResourceLoader;
 import org.apache.solr.ltr.feature.Feature;
 import org.apache.solr.ltr.feature.FeatureException;
 import org.apache.solr.ltr.feature.ValueFeature;
@@ -59,8 +58,6 @@ public class TestRerankBase extends RestTestBase {
 
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
-  protected static final SolrResourceLoader solrResourceLoader = new SolrResourceLoader();
-
   protected static File tmpSolrHome;
   protected static File tmpConfDir;
 
@@ -76,7 +73,6 @@ public class TestRerankBase extends RestTestBase {
 
   final private static String SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT = "solr.ltr.transformer.fv.defaultFormat";
   private static String defaultFeatureFormat;
-  protected static JettySolrRunner jetty;
 
   protected String chooseDefaultFeatureVector(String dense, String sparse) {
     if (defaultFeatureFormat == null) {
@@ -116,25 +112,25 @@ public class TestRerankBase extends RestTestBase {
     System.clearProperty(SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT);
   }
 
-  protected static void setuptest(boolean bulkIndex) throws Exception {
+  protected void setuptest(boolean bulkIndex) throws Exception {
     chooseDefaultFeatureFormat();
     setuptest("solrconfig-ltr.xml", "schema.xml");
     if (bulkIndex) bulkIndex();
   }
 
-  protected static void setupPersistenttest(boolean bulkIndex) throws Exception {
+  protected void setupPersistenttest(boolean bulkIndex) throws Exception {
     chooseDefaultFeatureFormat();
     setupPersistentTest("solrconfig-ltr.xml", "schema.xml");
     if (bulkIndex) bulkIndex();
   }
 
-  public static ManagedFeatureStore getManagedFeatureStore() {
+  public ManagedFeatureStore getManagedFeatureStore() {
     try (SolrCore core = jetty.getCoreContainer().getCore(DEFAULT_TEST_CORENAME)) {
       return ManagedFeatureStore.getManagedFeatureStore(core);
     }
   }
 
-  public static ManagedModelStore getManagedModelStore() {
+  public ManagedModelStore getManagedModelStore() {
     try (SolrCore core = jetty.getCoreContainer().getCore(DEFAULT_TEST_CORENAME)) {
       return ManagedModelStore.getManagedModelStore(core);
     }
@@ -187,7 +183,7 @@ public class TestRerankBase extends RestTestBase {
     return extraServlets;
   }
 
-  public static void setuptest(String solrconfig, String schema)
+  public void setuptest(String solrconfig, String schema)
       throws Exception {
 
     SortedMap<ServletHolder,String> extraServlets =
@@ -198,7 +194,7 @@ public class TestRerankBase extends RestTestBase {
         "/solr", true, extraServlets);
   }
 
-  public static void setupPersistentTest(String solrconfig, String schema)
+  public void setupPersistentTest(String solrconfig, String schema)
       throws Exception {
 
     SortedMap<ServletHolder,String> extraServlets =
@@ -208,15 +204,7 @@ public class TestRerankBase extends RestTestBase {
         "/solr", true, extraServlets);
   }
 
-  protected static void aftertest() throws Exception {
-    if (null != restTestHarness) {
-      restTestHarness.close();
-      restTestHarness = null;
-    }
-    if (null != jetty) {
-      jetty.stop();
-      jetty = null;
-    }
+  protected void aftertest() throws Exception {
     if (null != tmpSolrHome) {
       FileUtils.deleteDirectory(tmpSolrHome);
       tmpSolrHome = null;
@@ -226,7 +214,7 @@ public class TestRerankBase extends RestTestBase {
     unchooseDefaultFeatureFormat();
   }
 
-  public static void makeRestTestHarnessNull() {
+  public void makeRestTestHarnessNull() {
     restTestHarness = null;
   }
 
@@ -274,7 +262,7 @@ public class TestRerankBase extends RestTestBase {
     return sb.toString();
   }
 
-  protected static void loadFeature(String name, String type, String params)
+  protected void loadFeature(String name, String type, String params)
       throws Exception {
     final String feature = getFeatureInJson(name, type, "test", params);
     log.info("loading feauture \n{} ", feature);
@@ -282,7 +270,7 @@ public class TestRerankBase extends RestTestBase {
         "/responseHeader/status==0");
   }
 
-  protected static void loadFeature(String name, String type, String fstore,
+  protected void loadFeature(String name, String type, String fstore,
       String params) throws Exception {
     final String feature = getFeatureInJson(name, type, fstore, params);
     log.info("loading feauture \n{} ", feature);
@@ -290,12 +278,12 @@ public class TestRerankBase extends RestTestBase {
         "/responseHeader/status==0");
   }
 
-  protected static void loadModel(String name, String type, String[] features,
+  protected void loadModel(String name, String type, String[] features,
       String params) throws Exception {
     loadModel(name, type, features, "test", params);
   }
 
-  protected static void loadModel(String name, String type, String[] features,
+  protected void loadModel(String name, String type, String[] features,
       String fstore, String params) throws Exception {
     final String model = getModelInJson(name, type, features, fstore, params);
     log.info("loading model \n{} ", model);
@@ -303,7 +291,7 @@ public class TestRerankBase extends RestTestBase {
         "/responseHeader/status==0");
   }
 
-  public static void loadModels(String fileName) throws Exception {
+  public void loadModels(String fileName) throws Exception {
     final URL url = TestRerankBase.class.getResource("/modelExamples/"
         + fileName);
     final String multipleModels = FileUtils.readFileToString(
@@ -313,13 +301,13 @@ public class TestRerankBase extends RestTestBase {
         "/responseHeader/status==0");
   }
 
-  public static LTRScoringModel createModelFromFiles(String modelFileName,
+  public LTRScoringModel createModelFromFiles(String modelFileName,
       String featureFileName) throws ModelException, Exception {
     return createModelFromFiles(modelFileName, featureFileName,
         FeatureStore.DEFAULT_FEATURE_STORE_NAME);
   }
 
-  public static LTRScoringModel createModelFromFiles(String modelFileName,
+  public LTRScoringModel createModelFromFiles(String modelFileName,
       String featureFileName, String featureStoreName) throws ModelException, Exception {
     URL url = TestRerankBase.class.getResource("/modelExamples/"
         + modelFileName);
@@ -349,7 +337,7 @@ public class TestRerankBase extends RestTestBase {
     ms.setManagedFeatureStore(fs); // can we skip this and just use fs directly below?
 
     final LTRScoringModel ltrScoringModel = ManagedModelStore.fromLTRScoringModelMap(
-        solrResourceLoader, mapFromJson(modelJson), ms.getManagedFeatureStore());
+        solrConfig.getResourceLoader(), mapFromJson(modelJson), ms.getManagedFeatureStore());
     ms.addModel(ltrScoringModel);
     return ltrScoringModel;
   }
@@ -365,7 +353,7 @@ public class TestRerankBase extends RestTestBase {
     return (Map<String,Object>) parsedJson;
   }
 
-  public static void loadFeatures(String fileName) throws Exception {
+  public void loadFeatures(String fileName) throws Exception {
     final URL url = TestRerankBase.class.getResource("/featureExamples/"
         + fileName);
     final String multipleFeatures = FileUtils.readFileToString(
@@ -383,7 +371,7 @@ public class TestRerankBase extends RestTestBase {
     for (final String name : names) {
       final Map<String,Object> params = new HashMap<String,Object>();
       params.put("value", 10);
-      final Feature f = Feature.getInstance(solrResourceLoader,
+      final Feature f = Feature.getInstance(solrConfig.getResourceLoader(),
           ValueFeature.class.getCanonicalName(),
           name, params);
       f.setIndex(pos);
@@ -397,7 +385,7 @@ public class TestRerankBase extends RestTestBase {
     return getFeatures(Arrays.asList(names));
   }
 
-  protected static void loadModelAndFeatures(String name, int allFeatureCount,
+  protected void loadModelAndFeatures(String name, int allFeatureCount,
       int modelFeatureCount) throws Exception {
     final String[] features = new String[modelFeatureCount];
     final String[] weights = new String[modelFeatureCount];
@@ -415,16 +403,16 @@ public class TestRerankBase extends RestTestBase {
         "{\"weights\":{" + String.join(",", weights) + "}}");
   }
 
-  protected static void bulkIndex() throws Exception {
-    assertU(adoc("title", "bloomberg different bla", "description",
+  protected void bulkIndex() throws Exception {
+    restTestHarness.update(adoc("title", "bloomberg different bla", "description",
         "bloomberg", "id", "6", "popularity", "1"));
-    assertU(adoc("title", "bloomberg bloomberg ", "description", "bloomberg",
+    restTestHarness.update(adoc("title", "bloomberg bloomberg ", "description", "bloomberg",
         "id", "7", "popularity", "2"));
-    assertU(adoc("title", "bloomberg bloomberg bloomberg", "description",
+    restTestHarness.update(adoc("title", "bloomberg bloomberg bloomberg", "description",
         "bloomberg", "id", "8", "popularity", "3"));
-    assertU(adoc("title", "bloomberg bloomberg bloomberg bloomberg",
+    restTestHarness.update(adoc("title", "bloomberg bloomberg bloomberg bloomberg",
         "description", "bloomberg", "id", "9", "popularity", "5"));
-    assertU(commit());
+    restTestHarness.update(commit());
   }
 
   protected static void bulkIndex(String filePath) throws Exception {
@@ -519,14 +507,14 @@ public class TestRerankBase extends RestTestBase {
     final String featureName = "randomFeatureName"+random().nextInt(10);
 
     // create a feature from the parameters
-    final Feature featureA = Feature.getInstance(solrResourceLoader,
+    final Feature featureA = Feature.getInstance(solrConfig.getResourceLoader(),
         featureClassName, featureName, paramsA);
 
     // turn the feature back into parameters
     final LinkedHashMap<String,Object> paramsB = featureA.paramsToMap();
 
     // create feature B from feature A's parameters
-    final Feature featureB = Feature.getInstance(solrResourceLoader,
+    final Feature featureB = Feature.getInstance(solrConfig.getResourceLoader(),
         featureClassName, featureName, paramsB);
 
     // check that feature A and feature B are identical
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java
index 7d857cf..e6bea7a 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java
@@ -47,24 +47,23 @@ import org.apache.solr.ltr.model.ModelException;
 import org.apache.solr.ltr.model.TestLinearModel;
 import org.apache.solr.ltr.norm.IdentityNormalizer;
 import org.apache.solr.ltr.norm.Normalizer;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.junit.After;
+import org.junit.Before;;
 import org.junit.Ignore;
 import org.junit.Test;
 
-@Ignore // nocommit flakey
 public class TestSelectiveWeightCreation extends TestRerankBase {
   private IndexSearcher getSearcher(IndexReader r) {
     final IndexSearcher searcher = newSearcher(r, false, false);
     return searcher;
   }
 
-  private static List<Feature> makeFeatures(int[] featureIds) {
+  private List<Feature> makeFeatures(int[] featureIds) {
     final List<Feature> features = new ArrayList<>();
     for (final int i : featureIds) {
       Map<String,Object> params = new HashMap<String,Object>();
       params.put("value", i);
-      final Feature f = Feature.getInstance(solrResourceLoader,
+      final Feature f = Feature.getInstance(jetty.getCoreContainer().getCores().iterator().next().getResourceLoader(),
           ValueFeature.class.getName(),
           "f" + i, params);
       f.setIndex(i);
@@ -95,28 +94,31 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
   }
 
 
-  @BeforeClass
-  public static void before() throws Exception {
+  @Before
+  public void setUp() throws Exception {
     setuptest(false);
 
-    assertU(adoc("id", "1", "title", "w3 w1", "description", "w1", "popularity", "1"));
-    assertU(adoc("id", "2", "title", "w2",    "description", "w2", "popularity", "2"));
-    assertU(adoc("id", "3", "title", "w3",    "description", "w3", "popularity", "3"));
-    assertU(adoc("id", "4", "title", "w3 w3", "description", "w4", "popularity", "4"));
-    assertU(adoc("id", "5", "title", "w5",    "description", "w5", "popularity", "5"));
-    assertU(commit());
+    restTestHarness.update(adoc("id", "1", "title", "w3 w1", "description", "w1", "popularity", "1"));
+    restTestHarness.update(adoc("id", "2", "title", "w2",    "description", "w2", "popularity", "2"));
+    restTestHarness.update(adoc("id", "3", "title", "w3",    "description", "w3", "popularity", "3"));
+    restTestHarness.update(adoc("id", "4", "title", "w3 w3", "description", "w4", "popularity", "4"));
+    restTestHarness.update(adoc("id", "5", "title", "w5",    "description", "w5", "popularity", "5"));
+    restTestHarness.update(commit());
 
     loadFeatures("external_features.json");
     loadModels("external_model.json");
     loadModels("external_model2.json");
+    super.setUp();
   }
 
-  @AfterClass
-  public static void after() throws Exception {
+  @After
+  public void after() throws Exception {
+    super.tearDown();
     aftertest();
   }
 
   @Test
+  @Nightly
   public void testScoringQueryWeightCreation() throws IOException, ModelException {
     final Directory dir = newDirectory();
     final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
@@ -194,13 +196,15 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
     }
     assertEquals(validFeatures, allFeatures.size());
 
-    assertU(delI("10"));assertU(delI("11"));
+    restTestHarness.update(delI("10"));
+    restTestHarness.update(delI("11"));
     r.close();
     dir.close();
   }
 
 
   @Test
+  @Nightly
   public void testSelectiveWeightsRequestFeaturesFromDifferentStore() throws Exception {
 
 //    final String docs0fv_sparse = FeatureLoggerTestUtils.toFeatureVector(
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java
index 0c97f0f..f42cebd 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java
@@ -19,38 +19,40 @@ package org.apache.solr.ltr.feature;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.ltr.FeatureLoggerTestUtils;
 import org.apache.solr.ltr.TestRerankBase;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 
 public class TestExternalFeatures extends TestRerankBase {
 
-  @BeforeClass
-  public static void before() throws Exception {
+  @Before
+  public void setUp() throws Exception {
     setuptest(false);
 
-    assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity",
+    restTestHarness.update(adoc("id", "1", "title", "w1", "description", "w1", "popularity",
         "1"));
-    assertU(adoc("id", "2", "title", "w2", "description", "w2", "popularity",
+    restTestHarness.update(adoc("id", "2", "title", "w2", "description", "w2", "popularity",
         "2"));
-    assertU(adoc("id", "3", "title", "w3", "description", "w3", "popularity",
+    restTestHarness.update(adoc("id", "3", "title", "w3", "description", "w3", "popularity",
         "3"));
-    assertU(adoc("id", "4", "title", "w4", "description", "w4", "popularity",
+    restTestHarness.update(adoc("id", "4", "title", "w4", "description", "w4", "popularity",
         "4"));
-    assertU(adoc("id", "5", "title", "w5", "description", "w5", "popularity",
+    restTestHarness.update(adoc("id", "5", "title", "w5", "description", "w5", "popularity",
         "5"));
-    assertU(commit());
+    restTestHarness.update(commit());
 
     loadFeatures("external_features.json");
     loadModels("external_model.json");
+    super.setUp();
   }
 
-  @AfterClass
-  public static void after() throws Exception {
-    aftertest();
+  @After
+  public void tearDown() throws Exception {
+    super.tearDown();
   }
 
   @Test
+  @Nightly
   public void testEfiInTransformerShouldNotChangeOrderOfRerankedResults() throws Exception {
     final SolrQuery query = new SolrQuery();
     query.setQuery("*:*");
@@ -86,6 +88,7 @@ public class TestExternalFeatures extends TestRerankBase {
   }
 
   @Test
+  @Nightly
   public void testFeaturesUseStopwordQueryReturnEmptyFeatureVector() throws Exception {
     final SolrQuery query = new SolrQuery();
     query.setQuery("*:*");
@@ -107,6 +110,7 @@ public class TestExternalFeatures extends TestRerankBase {
   }
 
   @Test
+  @Nightly
   public void testEfiFeatureExtraction() throws Exception {
     final SolrQuery query = new SolrQuery();
     query.setQuery("*:*");
@@ -132,6 +136,7 @@ public class TestExternalFeatures extends TestRerankBase {
   }
 
   @Test
+  @Nightly
   public void featureExtraction_valueFeatureImplicitlyNotRequired_shouldNotScoreFeature() throws Exception {
     final SolrQuery query = new SolrQuery();
     query.setQuery("*:*");
@@ -152,6 +157,7 @@ public class TestExternalFeatures extends TestRerankBase {
   }
 
   @Test
+  @Nightly
   public void featureExtraction_valueFeatureExplicitlyNotRequired_shouldNotScoreFeature() throws Exception {
     final SolrQuery query = new SolrQuery();
     query.setQuery("*:*");
@@ -172,6 +178,7 @@ public class TestExternalFeatures extends TestRerankBase {
   }
 
   @Test
+  @Nightly
   public void featureExtraction_valueFeatureRequired_shouldThrowException() throws Exception {
     final SolrQuery query = new SolrQuery();
     query.setQuery("*:*");
@@ -184,6 +191,7 @@ public class TestExternalFeatures extends TestRerankBase {
   }
 
   @Test
+  @Nightly
   public void featureExtraction_valueFeatureRequiredInFq_shouldThrowException() throws Exception {
     final String userTitlePhrase1 = "userTitlePhrase1";
     final String userTitlePhrase2 = "userTitlePhrase2";
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java
index 64b0624..662b795 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java
@@ -16,39 +16,41 @@
  */
 package org.apache.solr.ltr.feature;
 
+import org.apache.lucene.util.LuceneTestCase;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.ltr.FeatureLoggerTestUtils;
 import org.apache.solr.ltr.TestRerankBase;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 
-@Ignore // nocommit flakey
+@LuceneTestCase.Nightly
 public class TestExternalValueFeatures extends TestRerankBase {
 
-  @BeforeClass
-  public static void before() throws Exception {
+  @Before
+  public void before() throws Exception {
     setuptest(false);
 
-    assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity",
+    restTestHarness.update(adoc("id", "1", "title", "w1", "description", "w1", "popularity",
         "1"));
-    assertU(adoc("id", "2", "title", "w2", "description", "w2", "popularity",
+    restTestHarness.update(adoc("id", "2", "title", "w2", "description", "w2", "popularity",
         "2"));
-    assertU(adoc("id", "3", "title", "w3", "description", "w3", "popularity",
+    restTestHarness.update(adoc("id", "3", "title", "w3", "description", "w3", "popularity",
         "3"));
-    assertU(adoc("id", "4", "title", "w4", "description", "w4", "popularity",
+    restTestHarness.update(adoc("id", "4", "title", "w4", "description", "w4", "popularity",
         "4"));
-    assertU(adoc("id", "5", "title", "w5", "description", "w5", "popularity",
+    restTestHarness.update(adoc("id", "5", "title", "w5", "description", "w5", "popularity",
         "5"));
-    assertU(commit());
+    restTestHarness.update(commit());
 
     loadFeatures("external_features_for_sparse_processing.json");
     loadModels("multipleadditivetreesmodel_external_binary_features.json");
+    super.setUp();
   }
 
-  @AfterClass
-  public static void after() throws Exception {
+  @After
+  public void tearDown() throws Exception {
+    super.tearDown();
     aftertest();
   }
 
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureExtractionFromMultipleSegments.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureExtractionFromMultipleSegments.java
index a15178b..c0c6e7e 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureExtractionFromMultipleSegments.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureExtractionFromMultipleSegments.java
@@ -19,14 +19,15 @@ package org.apache.solr.ltr.feature;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.lucene.util.LuceneTestCase;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.ltr.TestRerankBase;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 
-
+@LuceneTestCase.Nightly
 public class TestFeatureExtractionFromMultipleSegments extends TestRerankBase {
   static final String AB = "abcdefghijklmnopqrstuvwxyz";
 
@@ -38,43 +39,45 @@ public class TestFeatureExtractionFromMultipleSegments extends TestRerankBase {
     return sb.toString();
  }
 
-  @BeforeClass
-  public static void before() throws Exception {
+  @Before
+  public void setUp() throws Exception {
     // solrconfig-multiseg.xml contains the merge policy to restrict merging
     setuptest("solrconfig-multiseg.xml", "schema.xml");
     // index 400 documents
     for(int i = 0; i<400;i=i+20) {
-      assertU(adoc("id", Integer.toString(i),   "popularity", "201", "description", "apple is a company " + randomString(i%6+3), "normHits", "0.1"));
-      assertU(adoc("id", Integer.toString(i+1), "popularity", "201", "description", "d " + randomString(i%6+3), "normHits", "0.11"));
+      restTestHarness.update(adoc("id", Integer.toString(i),   "popularity", "201", "description", "apple is a company " + randomString(i%6+3), "normHits", "0.1"));
+      restTestHarness.update(adoc("id", Integer.toString(i+1), "popularity", "201", "description", "d " + randomString(i%6+3), "normHits", "0.11"));
 
-      assertU(adoc("id", Integer.toString(i+2), "popularity", "201", "description", "apple is a company too " + randomString(i%6+3), "normHits", "0.1"));
-      assertU(adoc("id", Integer.toString(i+3), "popularity", "201", "description", "new york city is big apple " + randomString(i%6+3), "normHits", "0.11"));
+      restTestHarness.update(adoc("id", Integer.toString(i+2), "popularity", "201", "description", "apple is a company too " + randomString(i%6+3), "normHits", "0.1"));
+      restTestHarness.update(adoc("id", Integer.toString(i+3), "popularity", "201", "description", "new york city is big apple " + randomString(i%6+3), "normHits", "0.11"));
 
-      assertU(adoc("id", Integer.toString(i+6), "popularity", "301", "description", "function name " + randomString(i%6+3), "normHits", "0.1"));
-      assertU(adoc("id", Integer.toString(i+7), "popularity", "301", "description", "function " + randomString(i%6+3), "normHits", "0.1"));
+      restTestHarness.update(adoc("id", Integer.toString(i+6), "popularity", "301", "description", "function name " + randomString(i%6+3), "normHits", "0.1"));
+      restTestHarness.update(adoc("id", Integer.toString(i+7), "popularity", "301", "description", "function " + randomString(i%6+3), "normHits", "0.1"));
 
-      assertU(adoc("id", Integer.toString(i+8), "popularity", "301", "description", "This is a sample function for testing " + randomString(i%6+3), "normHits", "0.1"));
-      assertU(adoc("id", Integer.toString(i+9), "popularity", "301", "description", "Function to check out stock prices "+randomString(i%6+3), "normHits", "0.1"));
-      assertU(adoc("id", Integer.toString(i+10),"popularity", "301", "description", "Some descriptions "+randomString(i%6+3), "normHits", "0.1"));
+      restTestHarness.update(adoc("id", Integer.toString(i+8), "popularity", "301", "description", "This is a sample function for testing " + randomString(i%6+3), "normHits", "0.1"));
+      restTestHarness.update(adoc("id", Integer.toString(i+9), "popularity", "301", "description", "Function to check out stock prices "+randomString(i%6+3), "normHits", "0.1"));
+      restTestHarness.update(adoc("id", Integer.toString(i+10),"popularity", "301", "description", "Some descriptions "+randomString(i%6+3), "normHits", "0.1"));
 
-      assertU(adoc("id", Integer.toString(i+11), "popularity", "201", "description", "apple apple is a company " + randomString(i%6+3), "normHits", "0.1"));
-      assertU(adoc("id", Integer.toString(i+12), "popularity", "201", "description", "Big Apple is New York.", "normHits", "0.01"));
-      assertU(adoc("id", Integer.toString(i+13), "popularity", "201", "description", "New some York is Big. "+ randomString(i%6+3), "normHits", "0.1"));
+      restTestHarness.update(adoc("id", Integer.toString(i+11), "popularity", "201", "description", "apple apple is a company " + randomString(i%6+3), "normHits", "0.1"));
+      restTestHarness.update(adoc("id", Integer.toString(i+12), "popularity", "201", "description", "Big Apple is New York.", "normHits", "0.01"));
+      restTestHarness.update(adoc("id", Integer.toString(i+13), "popularity", "201", "description", "New some York is Big. "+ randomString(i%6+3), "normHits", "0.1"));
 
-      assertU(adoc("id", Integer.toString(i+14), "popularity", "201", "description", "apple apple is a company " + randomString(i%6+3), "normHits", "0.1"));
-      assertU(adoc("id", Integer.toString(i+15), "popularity", "201", "description", "Big Apple is New York.", "normHits", "0.01"));
-      assertU(adoc("id", Integer.toString(i+16), "popularity", "401", "description", "barack h", "normHits", "0.0"));
-      assertU(adoc("id", Integer.toString(i+17), "popularity", "201", "description", "red delicious apple " + randomString(i%6+3), "normHits", "0.1"));
-      assertU(adoc("id", Integer.toString(i+18), "popularity", "201", "description", "nyc " + randomString(i%6+3), "normHits", "0.11"));
+      restTestHarness.update(adoc("id", Integer.toString(i+14), "popularity", "201", "description", "apple apple is a company " + randomString(i%6+3), "normHits", "0.1"));
+      restTestHarness.update(adoc("id", Integer.toString(i+15), "popularity", "201", "description", "Big Apple is New York.", "normHits", "0.01"));
+      restTestHarness.update(adoc("id", Integer.toString(i+16), "popularity", "401", "description", "barack h", "normHits", "0.0"));
+      restTestHarness.update(adoc("id", Integer.toString(i+17), "popularity", "201", "description", "red delicious apple " + randomString(i%6+3), "normHits", "0.1"));
+      restTestHarness.update(adoc("id", Integer.toString(i+18), "popularity", "201", "description", "nyc " + randomString(i%6+3), "normHits", "0.11"));
     }
 
-    assertU(commit());
+    restTestHarness.update(commit());
 
     loadFeatures("comp_features.json");
+    super.setUp();
   }
 
-  @AfterClass
-  public static void after() throws Exception {
+  @After
+  public void tearDown() throws Exception {
+    super.tearDown();
     aftertest();
   }
 
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreFeature.java
index 422dfa6..70a2d2a 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreFeature.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreFeature.java
@@ -77,7 +77,7 @@ public class TestOriginalScoreFeature extends TestRerankBase {
     implTestOriginalScoreResponseDocsCheck("origScore", "origScore", "c2", "2.0");
   }
 
-  public static void implTestOriginalScoreResponseDocsCheck(String modelName,
+  public void implTestOriginalScoreResponseDocsCheck(String modelName,
       String origScoreFeatureName,
       String nonScoringFeatureName, String nonScoringFeatureValue) throws Exception {
 
@@ -135,7 +135,7 @@ public class TestOriginalScoreFeature extends TestRerankBase {
         nonScoringFeatureName, nonScoringFeatureValue, debugQuery);
   }
 
-  private static void implTestOriginalScoreResponseDocsCheck(String modelName,
+  private void implTestOriginalScoreResponseDocsCheck(String modelName,
       SolrQuery query, int docIdx, int docId,
       String origScoreFeatureName, String origScoreFeatureValue,
       String nonScoringFeatureName, String nonScoringFeatureValue,
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java
index e922382..c9abced 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java
@@ -16,6 +16,7 @@
  */
 package org.apache.solr.ltr.model;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -24,6 +25,7 @@ import java.util.Map;
 
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.core.SolrResourceLoader;
 import org.apache.solr.ltr.TestRerankBase;
 import org.apache.solr.ltr.feature.Feature;
 import org.apache.solr.ltr.norm.IdentityNormalizer;
@@ -40,12 +42,11 @@ public class TestLinearModel extends TestRerankBase {
   public static LTRScoringModel createLinearModel(String name, List<Feature> features,
       List<Normalizer> norms,
       String featureStoreName, List<Feature> allFeatures,
-      Map<String,Object> params) throws ModelException {
-    final LTRScoringModel model = LTRScoringModel.getInstance(solrResourceLoader,
-        LinearModel.class.getName(),
-        name,
-        features, norms, featureStoreName, allFeatures, params);
-    return model;
+      Map<String,Object> params) throws ModelException, IOException {
+    try (SolrResourceLoader solrResourceLoader = new SolrResourceLoader()) {
+      final LTRScoringModel model = LTRScoringModel.getInstance(solrResourceLoader, LinearModel.class.getName(), name, features, norms, featureStoreName, allFeatures, params);
+      return model;
+    }
   }
 
   public static Map<String,Object> makeFeatureWeights(List<Feature> features) {
@@ -77,7 +78,7 @@ public class TestLinearModel extends TestRerankBase {
   }
   
   @Test
-  public void getInstanceTest() {
+  public void getInstanceTest() throws IOException {
     final Map<String,Object> weights = new HashMap<>();
     weights.put("constant1", 1d);
     weights.put("constant5", 1d);
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestNeuralNetworkModel.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestNeuralNetworkModel.java
index 682b278..f542744 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestNeuralNetworkModel.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestNeuralNetworkModel.java
@@ -40,7 +40,7 @@ public class TestNeuralNetworkModel extends TestRerankBase {
       List<Normalizer> norms,
       String featureStoreName, List<Feature> allFeatures,
       Map<String,Object> params) throws ModelException {
-    return LTRScoringModel.getInstance(solrResourceLoader,
+    return LTRScoringModel.getInstance(solrConfig.getResourceLoader(),
         NeuralNetworkModel.class.getName(),
         name,
         features, norms, featureStoreName, allFeatures, params);
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/norm/TestMinMaxNormalizer.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/norm/TestMinMaxNormalizer.java
index 7627ae9..fb50993 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/norm/TestMinMaxNormalizer.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/norm/TestMinMaxNormalizer.java
@@ -16,19 +16,23 @@
  */
 package org.apache.solr.ltr.norm;
 
+import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.solr.SolrTestCase;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.core.SolrResourceLoader;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
-public class TestMinMaxNormalizer {
+public class TestMinMaxNormalizer extends SolrTestCase {
 
-  private final SolrResourceLoader solrResourceLoader = new SolrResourceLoader();
+  private static SolrResourceLoader solrResourceLoader;
 
   private Normalizer implTestMinMax(Map<String,Object> params,
       float expectedMin, float expectedMax) {
@@ -44,6 +48,17 @@ public class TestMinMaxNormalizer {
     return n;
   }
 
+  @BeforeClass
+  public static void beforeClass() {
+    solrResourceLoader = new SolrResourceLoader();
+  }
+
+  @AfterClass
+  public static void afterClass() throws IOException {
+    solrResourceLoader.close();
+    solrResourceLoader = null;
+  }
+
   @Test
   public void testInvalidMinMaxNoParams() {
     implTestMinMax(new HashMap<String,Object>(),
@@ -122,8 +137,7 @@ public class TestMinMaxNormalizer {
     n1.setMax(10.0f);
 
     final Map<String,Object> params = n1.paramsToMap();
-    final MinMaxNormalizer n2 = (MinMaxNormalizer) Normalizer.getInstance(
-        new SolrResourceLoader(),
+    final MinMaxNormalizer n2 = (MinMaxNormalizer) Normalizer.getInstance(solrResourceLoader,
         MinMaxNormalizer.class.getName(),
         params);
     assertEquals(n1.getMin(), n2.getMin(), 1e-6);
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/norm/TestStandardNormalizer.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/norm/TestStandardNormalizer.java
index 62e415f..bea3d28 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/norm/TestStandardNormalizer.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/norm/TestStandardNormalizer.java
@@ -16,19 +16,23 @@
  */
 package org.apache.solr.ltr.norm;
 
+import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.solr.SolrTestCase;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.core.SolrResourceLoader;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
-public class TestStandardNormalizer {
+public class TestStandardNormalizer extends SolrTestCase {
 
-  private final SolrResourceLoader solrResourceLoader = new SolrResourceLoader();
+  private static SolrResourceLoader solrResourceLoader;
 
   private Normalizer implTestStandard(Map<String,Object> params,
       float expectedAvg, float expectedStd) {
@@ -44,6 +48,17 @@ public class TestStandardNormalizer {
     return n;
   }
 
+  @BeforeClass
+  public static void beforeClass() {
+    solrResourceLoader = new SolrResourceLoader();
+  }
+
+  @AfterClass
+  public static void afterClass() throws IOException {
+    solrResourceLoader.close();
+    solrResourceLoader = null;
+  }
+
   @Test
   public void testNormalizerNoParams() {
     implTestStandard(new HashMap<String,Object>(),
@@ -129,7 +144,7 @@ public class TestStandardNormalizer {
 
     final Map<String, Object> params = n1.paramsToMap();
     final StandardNormalizer n2 = (StandardNormalizer) Normalizer.getInstance(
-        new SolrResourceLoader(),
+        solrResourceLoader,
         StandardNormalizer.class.getName(),
         params);
     assertEquals(n1.getAvg(), n2.getAvg(), 1e-6);
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManager.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManager.java
index 3e033ba..87a40d1 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManager.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManager.java
@@ -16,6 +16,7 @@
  */
 package org.apache.solr.ltr.store.rest;
 
+import org.apache.lucene.util.LuceneTestCase;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrResourceLoader;
 import org.apache.solr.ltr.TestRerankBase;
@@ -27,46 +28,52 @@ import org.apache.solr.ltr.store.FeatureStore;
 import org.apache.solr.rest.ManagedResource;
 import org.apache.solr.rest.ManagedResourceStorage;
 import org.apache.solr.rest.RestManager;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 
-@Ignore // nocommit flakey
+@LuceneTestCase.Nightly
 public class TestModelManager extends TestRerankBase {
 
-  @BeforeClass
-  public static void init() throws Exception {
+  @Before
+  public void setUp() throws Exception {
     setuptest(true);
+    super.setUp();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    super.tearDown();
   }
 
   @Test
   public void test() throws Exception {
-    final SolrResourceLoader loader = new SolrResourceLoader(
-        tmpSolrHome.toPath());
-
-    final RestManager.Registry registry = loader.getManagedResourceRegistry();
-    assertNotNull(
-        "Expected a non-null RestManager.Registry from the SolrResourceLoader!",
-        registry);
+    try (SolrResourceLoader loader = new SolrResourceLoader(
+        tmpSolrHome.toPath())) {
 
-    final String resourceId = "/schema/fstore1";
-    registry.registerManagedResource(resourceId, ManagedFeatureStore.class,
-        new LTRQParserPlugin());
+      final RestManager.Registry registry = loader.getManagedResourceRegistry();
+      assertNotNull(
+              "Expected a non-null RestManager.Registry from the SolrResourceLoader!",
+              registry);
 
-    final String resourceId2 = "/schema/mstore1";
-    registry.registerManagedResource(resourceId2, ManagedModelStore.class,
-        new LTRQParserPlugin());
+      final String resourceId = "/schema/fstore1";
+      registry.registerManagedResource(resourceId, ManagedFeatureStore.class,
+              new LTRQParserPlugin());
 
-    final NamedList<String> initArgs = new NamedList<>();
+      final String resourceId2 = "/schema/mstore1";
+      registry.registerManagedResource(resourceId2, ManagedModelStore.class,
+              new LTRQParserPlugin());
 
-    final RestManager restManager = new RestManager();
-    restManager.init(loader, initArgs,
-        new ManagedResourceStorage.InMemoryStorageIO());
+      final NamedList<String> initArgs = new NamedList<>();
 
-    final ManagedResource res = restManager.getManagedResource(resourceId);
-    assertTrue(res instanceof ManagedFeatureStore);
-    assertEquals(res.getResourceId(), resourceId);
+      final RestManager restManager = new RestManager();
+      restManager.init(loader, initArgs,
+              new ManagedResourceStorage.InMemoryStorageIO());
 
+      final ManagedResource res = restManager.getManagedResource(resourceId);
+      assertTrue(res instanceof ManagedFeatureStore);
+      assertEquals(res.getResourceId(), resourceId);
+    }
   }
 
   @Test
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManagerPersistence.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManagerPersistence.java
index fb28ff8..7c7f282 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManagerPersistence.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/store/rest/TestModelManagerPersistence.java
@@ -203,7 +203,7 @@ public class TestModelManagerPersistence extends TestRerankBase {
         "/features/==[]");
   }
 
-  private static void doWrapperModelPersistenceChecks(String modelName,
+  private void doWrapperModelPersistenceChecks(String modelName,
       String featureStoreName, String baseModelFileName) throws Exception {
     // note that the wrapper and the wrapped model always have the same name
     assertJQ(ManagedModelStore.REST_END_POINT,
diff --git a/solr/contrib/prometheus-exporter/src/java/org/apache/solr/prometheus/collector/SchedulerMetricsCollector.java b/solr/contrib/prometheus-exporter/src/java/org/apache/solr/prometheus/collector/SchedulerMetricsCollector.java
index 935301f..9563b07 100644
--- a/solr/contrib/prometheus-exporter/src/java/org/apache/solr/prometheus/collector/SchedulerMetricsCollector.java
+++ b/solr/contrib/prometheus-exporter/src/java/org/apache/solr/prometheus/collector/SchedulerMetricsCollector.java
@@ -51,7 +51,7 @@ public class SchedulerMetricsCollector implements Closeable {
 
   private final ScheduledThreadPoolExecutor scheduler = (ScheduledThreadPoolExecutor) Executors.newScheduledThreadPool(
       1,
-      new SolrNamedThreadFactory("scheduled-metrics-collector"));
+      new SolrNamedThreadFactory("scheduled-metrics-collector", true));
 
   private final Executor executor;
 
diff --git a/solr/contrib/prometheus-exporter/src/java/org/apache/solr/prometheus/exporter/SolrExporter.java b/solr/contrib/prometheus-exporter/src/java/org/apache/solr/prometheus/exporter/SolrExporter.java
index 782d421..b0dd198 100644
--- a/solr/contrib/prometheus-exporter/src/java/org/apache/solr/prometheus/exporter/SolrExporter.java
+++ b/solr/contrib/prometheus-exporter/src/java/org/apache/solr/prometheus/exporter/SolrExporter.java
@@ -216,7 +216,7 @@ public class SolrExporter {
 
   private static MetricsConfiguration loadMetricsConfiguration(Path configPath) {
     try (SolrResourceLoader loader = new SolrResourceLoader(configPath.getParent())) {
-      XmlConfigFile config = new XmlConfigFile(loader, configPath.getFileName().toString(), null, null, null, true);
+      XmlConfigFile config = new XmlConfigFile(loader, configPath.getFileName().toString(), null, null, null);
       return MetricsConfiguration.from(config);
     } catch (Exception e) {
       log.error("Could not load scrape configuration from {}", configPath.toAbsolutePath());
diff --git a/solr/contrib/prometheus-exporter/src/test/org/apache/solr/prometheus/scraper/SolrCloudScraperTest.java b/solr/contrib/prometheus-exporter/src/test/org/apache/solr/prometheus/scraper/SolrCloudScraperTest.java
index 0602536..571b540 100644
--- a/solr/contrib/prometheus-exporter/src/test/org/apache/solr/prometheus/scraper/SolrCloudScraperTest.java
+++ b/solr/contrib/prometheus-exporter/src/test/org/apache/solr/prometheus/scraper/SolrCloudScraperTest.java
@@ -156,7 +156,7 @@ public class SolrCloudScraperTest extends PrometheusExporterTestBase {
     assertEquals(1, liveNodeSamples.samples.size());
 
     assertEquals(
-        getClusterState().getLiveNodes().size(),
+        cluster.getSolrClient().getZkStateReader().getLiveNodes().size(),
         liveNodeSamples.samples.get(0).value, 0.001);
 
     Collector.MetricFamilySamples shardLeaderSamples = collection1Metrics.get(1);
diff --git a/solr/contrib/prometheus-exporter/src/test/org/apache/solr/prometheus/scraper/SolrStandaloneScraperTest.java b/solr/contrib/prometheus-exporter/src/test/org/apache/solr/prometheus/scraper/SolrStandaloneScraperTest.java
index 58dbed9..144a413 100644
--- a/solr/contrib/prometheus-exporter/src/test/org/apache/solr/prometheus/scraper/SolrStandaloneScraperTest.java
+++ b/solr/contrib/prometheus-exporter/src/test/org/apache/solr/prometheus/scraper/SolrStandaloneScraperTest.java
@@ -19,6 +19,7 @@ package org.apache.solr.prometheus.scraper;
 
 import io.prometheus.client.Collector;
 import org.apache.commons.io.FileUtils;
+import org.apache.lucene.util.LuceneTestCase;
 import org.apache.solr.client.solrj.impl.Http2SolrClient;
 import org.apache.solr.client.solrj.impl.NoOpResponseParser;
 import org.apache.solr.common.ParWork;
@@ -28,8 +29,8 @@ import org.apache.solr.prometheus.collector.MetricSamples;
 import org.apache.solr.prometheus.exporter.MetricsConfiguration;
 import org.apache.solr.prometheus.utils.Helpers;
 import org.apache.solr.util.RestTestBase;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Ignore;
 import org.junit.Test;
 
@@ -40,22 +41,20 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ExecutorService;
 
+@LuceneTestCase.Nightly
 public class SolrStandaloneScraperTest extends RestTestBase {
 
-  private static MetricsConfiguration configuration;
-  private static SolrStandaloneScraper solrScraper;
-  private static ExecutorService executor;
-  private static Http2SolrClient solrClient;
+  private MetricsConfiguration configuration;
+  private SolrStandaloneScraper solrScraper;
+  private ExecutorService executor;
+  private Http2SolrClient solrClient;
 
-  @BeforeClass
-  public static void setupBeforeClass() throws Exception {
+  @Before
+  public void setUp() throws Exception {
     File tmpSolrHome = createTempDir().toFile();
-    tmpSolrHome.deleteOnExit();
 
     FileUtils.copyDirectory(new File(TEST_HOME()), tmpSolrHome.getAbsoluteFile());
 
-    initCore("solrconfig.xml", "managed-schema");
-
     createJettyAndHarness(
         tmpSolrHome.getAbsolutePath(),
         "solrconfig.xml",
@@ -76,10 +75,13 @@ public class SolrStandaloneScraperTest extends RestTestBase {
     solrClient.setParser(responseParser);
 
     Helpers.indexAllDocs(solrClient);
+
+    super.setUp();
   }
 
-  @AfterClass
-  public static void cleanUp() throws Exception {
+  @After
+  public void tearDown() throws Exception {
+    super.tearDown();
     IOUtils.closeQuietly(solrScraper);
     if (null != executor) {
       executor = null;
diff --git a/solr/core/src/java/org/apache/solr/api/ApiBag.java b/solr/core/src/java/org/apache/solr/api/ApiBag.java
index e67cf45..27439f8 100644
--- a/solr/core/src/java/org/apache/solr/api/ApiBag.java
+++ b/solr/core/src/java/org/apache/solr/api/ApiBag.java
@@ -61,7 +61,7 @@ public class ApiBag {
   private final boolean isCoreSpecific;
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
-  private final Map<String, PathTrie<Api>> apis = new ConcurrentHashMap<>(128, 0.75f, 10);
+  private final Map<String, PathTrie<Api>> apis = new ConcurrentHashMap<>(32, 0.75f, 3);
 
   public ApiBag(boolean isCoreSpecific) {
     this.isCoreSpecific = isCoreSpecific;
@@ -144,7 +144,7 @@ public class ApiBag {
   }
 
   static void registerIntrospect(List<String> l, PathTrie<Api> registry, Map<String, String> substitutes, Api introspect) {
-    ArrayList<String> copy = new ArrayList<>(l.size() + 1);
+    List<String> copy = Collections.synchronizedList(new ArrayList(l.size() + 1));
     copy.addAll(l);
     copy.add("_introspect");
     registry.insert(copy, substitutes, introspect);
diff --git a/solr/core/src/java/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java b/solr/core/src/java/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java
index 8a18402..5ac7533 100644
--- a/solr/core/src/java/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java
+++ b/solr/core/src/java/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java
@@ -93,7 +93,7 @@ public class EmbeddedSolrServer extends SolrClient {
    * @param solrHome        the solr home directory
    * @param defaultCoreName the core to route requests to by default (optional)
    */
-  public EmbeddedSolrServer(Path solrHome, String defaultCoreName) {
+  public EmbeddedSolrServer(Path solrHome, String defaultCoreName) throws IOException {
     this(load(new CoreContainer(solrHome, new Properties())), defaultCoreName);
   }
 
diff --git a/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java b/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
index 66ddc1e..e1d1d6d 100644
--- a/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
+++ b/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
@@ -21,7 +21,6 @@ import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.cloud.SocketProxy;
 import org.apache.solr.client.solrj.impl.Http2SolrClient;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
-import org.apache.solr.cloud.ZkController;
 import org.apache.solr.common.ParWork;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.SolrZkClient;
@@ -29,9 +28,7 @@ import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.util.ObjectReleaseTracker;
 import org.apache.solr.common.util.SolrQueuedThreadPool;
 import org.apache.solr.common.util.SolrScheduledExecutorScheduler;
-import org.apache.solr.core.CloudConfig;
 import org.apache.solr.core.CoreContainer;
-import org.apache.solr.core.NodeConfig;
 import org.apache.solr.servlet.SolrDispatchFilter;
 import org.apache.solr.servlet.SolrQoSFilter;
 import org.apache.zookeeper.KeeperException;
@@ -88,7 +85,6 @@ import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 
@@ -104,7 +100,8 @@ public class JettySolrRunner implements Closeable {
   // NOTE: should be larger than HttpClientUtil.DEFAULT_SO_TIMEOUT or typical client SO timeout
   private static final int THREAD_POOL_MAX_IDLE_TIME_MS = 60000;
 
-  Server server;
+  final Server server;
+  private final ServerConnector connector;
 
   volatile FilterHolder dispatchFilter;
   volatile FilterHolder debugFilter;
@@ -137,8 +134,8 @@ public class JettySolrRunner implements Closeable {
   private volatile boolean isClosed;
 
 
-  private static Scheduler scheduler;
-  private volatile SolrQueuedThreadPool qtp;
+  private final Scheduler scheduler;
+  private final SolrQueuedThreadPool qtp;
   private volatile boolean closed;
 
   public String getContext() {
@@ -284,11 +281,8 @@ public class JettySolrRunner implements Closeable {
       setProxyPort(proxy.getListenPort());
     }
 
-    this.init(this.config.port);
-  }
+    int port = this.config.port;
 
-  private void init(int port) {
-    
     if (config.qtp != null) {
       qtp = config.qtp;
     } else {
@@ -298,9 +292,9 @@ public class JettySolrRunner implements Closeable {
 
     server = new Server(qtp);
 
-    server.setStopTimeout(30000); // will wait gracefull for stoptime / 2, then interrupts
-    assert config.stopAtShutdown;
-    server.setStopAtShutdown(config.stopAtShutdown);
+    server.setStopTimeout(0); // will wait gracefull for stoptime / 2, then interrupts
+
+    server.setStopAtShutdown(false);
 
     //if (System.getProperty("jetty.testMode") != null) {
 
@@ -323,7 +317,6 @@ public class JettySolrRunner implements Closeable {
     // https://github.com/eclipse/jetty.project/issues/1891
     configuration.setNotifyRemoteAsyncErrors(false);
 
-    ServerConnector connector;
     if (sslcontext != null) {
       configuration.setSecureScheme("https");
       configuration.addCustomizer(new SecureRequestCustomizer());
@@ -361,7 +354,6 @@ public class JettySolrRunner implements Closeable {
     }
     connector.setIdleTimeout(TimeUnit.MINUTES.toMillis(10));
     connector.setReuseAddress(true);
-    connector.setSoLingerTime(-1);
     connector.setPort(port);
     connector.setHost("127.0.0.1");
 
@@ -369,7 +361,7 @@ public class JettySolrRunner implements Closeable {
 
 
     //server.setDumpAfterStart(true);
-   // server.setDumpBeforeStop(true);
+    //server.setDumpBeforeStop(true);
 
     HandlerWrapper chain;
     {
@@ -388,47 +380,54 @@ public class JettySolrRunner implements Closeable {
 
         @Override
         public void lifeCycleStarting(LifeCycle arg0) {
-
+          if (startedBefore) {
+            root.getServletContext().setAttribute(SolrDispatchFilter.PROPERTIES_ATTRIBUTE, nodeProperties);
+            root.getServletContext().setAttribute(SolrDispatchFilter.SOLRHOME_ATTRIBUTE, solrHome);
+          }
         }
 
         @Override
         public void lifeCycleStarted(LifeCycle arg0) {
 
-          if (log.isDebugEnabled()) log.debug("Jetty loaded and ready to go");
-          root.getServletContext().setAttribute(SolrDispatchFilter.PROPERTIES_ATTRIBUTE, nodeProperties);
-          root.getServletContext().setAttribute(SolrDispatchFilter.SOLRHOME_ATTRIBUTE, solrHome);
-          root.getServletContext().setAttribute(SolrDispatchFilter.INIT_CALL, (Runnable) () -> {
-            jettyPort = getFirstConnectorPort();
-            int port1 = jettyPort;
-            if (proxyPort != -1) port1 = proxyPort;
-            nodeProperties.setProperty("hostPort", String.valueOf(port1));
-
-          });
-
-          debugFilter = root.addFilter(DebugFilter.class, "*", EnumSet.of(DispatcherType.REQUEST));
-          extraFilters = new LinkedList<>();
-          for (Map.Entry<Class<? extends Filter>,String> entry : config.extraFilters.entrySet()) {
-            extraFilters.add(root.addFilter(entry.getKey(), entry.getValue(), EnumSet.of(DispatcherType.REQUEST)));
-          }
+          if (!startedBefore) {
+            startedBefore = true;
 
-          for (Map.Entry<ServletHolder,String> entry : config.extraServlets.entrySet()) {
-            root.addServlet(entry.getKey(), entry.getValue());
-          }
-          dispatchFilter = root.getServletHandler().newFilterHolder(Source.EMBEDDED);
-          dispatchFilter.setHeldClass(SolrDispatchFilter.class);
-          dispatchFilter.setInitParameter("excludePatterns", excludePatterns);
+            root.getServletContext().setAttribute(SolrDispatchFilter.PROPERTIES_ATTRIBUTE, nodeProperties);
+            root.getServletContext().setAttribute(SolrDispatchFilter.SOLRHOME_ATTRIBUTE, solrHome);
+            root.getServletContext().setAttribute(SolrDispatchFilter.INIT_CALL, (Runnable) () -> {
+              jettyPort = getFirstConnectorPort();
+              int port1 = jettyPort;
+              if (proxyPort != -1) port1 = proxyPort;
+              nodeProperties.setProperty("hostPort", String.valueOf(port1));
+
+            });
+
+            debugFilter = root.addFilter(DebugFilter.class, "*", EnumSet.of(DispatcherType.REQUEST));
+            extraFilters = new LinkedList<>();
+            for (Map.Entry<Class<? extends Filter>,String> entry : config.extraFilters.entrySet()) {
+              extraFilters.add(root.addFilter(entry.getKey(), entry.getValue(), EnumSet.of(DispatcherType.REQUEST)));
+            }
+
+            for (Map.Entry<ServletHolder,String> entry : config.extraServlets.entrySet()) {
+              root.addServlet(entry.getKey(), entry.getValue());
+            }
+            dispatchFilter = root.getServletHandler().newFilterHolder(Source.EMBEDDED);
+            dispatchFilter.setHeldClass(SolrDispatchFilter.class);
+            dispatchFilter.setInitParameter("excludePatterns", excludePatterns);
 
-          qosFilter = root.getServletHandler().newFilterHolder(Source.EMBEDDED);
-          qosFilter.setHeldClass(SolrQoSFilter.class);
-          qosFilter.setAsyncSupported(true);
-          root.addFilter(qosFilter, "*", EnumSet.of(DispatcherType.REQUEST, DispatcherType.ASYNC));
+            qosFilter = root.getServletHandler().newFilterHolder(Source.EMBEDDED);
+            qosFilter.setHeldClass(SolrQoSFilter.class);
+            qosFilter.setAsyncSupported(true);
+            root.addFilter(qosFilter, "*", EnumSet.of(DispatcherType.REQUEST, DispatcherType.ASYNC));
 
-          root.addServlet(Servlet404.class, "/*");
+            root.addServlet(Servlet404.class, "/*");
 
-          // Map dispatchFilter in same path as in web.xml
-          dispatchFilter.setAsyncSupported(true);
-          root.addFilter(dispatchFilter, "*", EnumSet.of(DispatcherType.REQUEST, DispatcherType.ASYNC));
+            // Map dispatchFilter in same path as in web.xml
+            dispatchFilter.setAsyncSupported(true);
+            root.addFilter(dispatchFilter, "*", EnumSet.of(DispatcherType.REQUEST, DispatcherType.ASYNC));
 
+            if (log.isDebugEnabled()) log.debug("Jetty loaded and ready to go");
+          }
         }
 
         @Override
@@ -441,11 +440,11 @@ public class JettySolrRunner implements Closeable {
       chain = root;
     }
 
+    // no listener for jettysolrrunner, it requires one jetty per jvm
+    //server.addLifeCycleListener(new SolrLifcycleListener());
+
     chain = injectJettyHandlers(chain);
-    // no shutdown handler for embedded
-//    SolrShutdownHandler shutdownHandler = new SolrShutdownHandler();
-//    shutdownHandler.setHandler(chain);
-//    chain = shutdownHandler;
+
     if(config.enableV2) {
       RewriteHandler rwh = new RewriteHandler();
       rwh.setHandler(chain);
@@ -465,9 +464,10 @@ public class JettySolrRunner implements Closeable {
     gzipHandler.setIncludedMethods("GET");
 
     server.setHandler(gzipHandler);
-   // ShutdownThread.deregister(server);
+    // ShutdownThread.deregister(server);
   }
 
+
   /** descendants may inject own handler chaining it to the given root
    * and then returning that own one*/
   protected HandlerWrapper injectJettyHandlers(HandlerWrapper chain) {
@@ -476,7 +476,7 @@ public class JettySolrRunner implements Closeable {
 
   @Override
   public String toString() {
-    return "JettySolrRunner: " + getBaseUrl();
+    return "JettySolrRunner: " + jettyPort;
   }
 
   /**
@@ -538,14 +538,12 @@ public class JettySolrRunner implements Closeable {
     try {
       int port = reusePort && jettyPort != -1 ? jettyPort : this.config.port;
       log.info("Start Jetty (configured port={}, binding port={})", this.config.port, port);
-
-
-      // if started before, make a new server
       if (startedBefore) {
-        init(port);
-      } else {
-        startedBefore = true;
+        connector.setPort(port);
       }
+
+      // if !started before, make a new server
+
       boolean success = false;
       if (!server.isRunning()) {
         if (config.portRetryTime > 0) {
@@ -555,16 +553,9 @@ public class JettySolrRunner implements Closeable {
         }
       }
 
-      if (getCoreContainer() != null) {
-        NodeConfig conf = getCoreContainer().getConfig();
-        CloudConfig cloudConf = conf.getCloudConfig();
-        if (cloudConf != null) {
-          String localHostContext = ZkController.trimLeadingAndTrailingSlashes(cloudConf.getSolrHostContext());
-
-          int localHostPort = cloudConf.getSolrHostPort();
-          String hostName = ZkController.normalizeHostName(cloudConf.getHost());
-          nodeName = ZkController.generateNodeName(hostName, Integer.toString(localHostPort), localHostContext);
-        }
+      if (getCoreContainer() != null && getCoreContainer().isZooKeeperAware()) {
+        nodeName = getCoreContainer().getZkController().getNodeName();
+        if (nodeName == null) throw new IllegalStateException();
       }
 
       setProtocolAndHost();
@@ -608,7 +599,7 @@ public class JettySolrRunner implements Closeable {
 
         ZkStateReader reader = getCoreContainer().getZkController().getZkStateReader();
 
-        reader.waitForLiveNodes(30, TimeUnit.SECONDS, (o, n) -> n != null && getNodeName() != null && n.contains(getNodeName()));
+        reader.waitForLiveNodes(30, TimeUnit.SECONDS, (n) -> n != null && getNodeName() != null && n.contains(getNodeName()));
       }
 
     } finally {
@@ -700,6 +691,8 @@ public class JettySolrRunner implements Closeable {
         log.error("Error stopping jetty server", e);
       }
 
+      scheduler.stop();
+
       if (config.qtp == null) {
         qtp.close();
       }
@@ -711,29 +704,29 @@ public class JettySolrRunner implements Closeable {
         throw new RuntimeException(e);
       }
 
-      if (wait && coreContainer != null && coreContainer
-          .isZooKeeperAware()) {
-        log.info("waitForJettyToStop: {}", getLocalPort());
-        String nodeName = getNodeName();
-        if (nodeName == null) {
-          log.info("Cannot wait for Jetty with null node name");
-        } else {
-
-          log.info("waitForNode: {}", getNodeName());
-
-          ZkStateReader reader = coreContainer.getZkController().getZkStateReader();
-
-          try {
-            if (!reader.isClosed() && reader.getZkClient().isConnected()) {
-              reader.waitForLiveNodes(10, TimeUnit.SECONDS, (o, n) -> !n.contains(nodeName));
-            }
-          } catch (InterruptedException e) {
-            ParWork.propagateInterrupt(e);
-          } catch (TimeoutException e) {
-            log.error("Timeout waiting for live node");
-          }
-        }
-      }
+//      if (wait && coreContainer != null && coreContainer
+//          .isZooKeeperAware()) {
+//        log.info("waitForJettyToStop: {}", getLocalPort());
+//        String nodeName = getNodeName();
+//        if (nodeName == null) {
+//          log.info("Cannot wait for Jetty with null node name");
+//        } else {
+//
+//          log.info("waitForNode: {}", getNodeName());
+//
+//          ZkStateReader reader = coreContainer.getZkController().getZkStateReader();
+//
+//          try {
+//            if (!reader.isClosed() && reader.getZkClient().isConnected()) {
+//              reader.waitForLiveNodes(10, TimeUnit.SECONDS, (n) -> !n.contains(nodeName));
+//            }
+//          } catch (InterruptedException e) {
+//            ParWork.propagateInterrupt(e);
+//          } catch (TimeoutException e) {
+//            log.error("Timeout waiting for live node");
+//          }
+//        }
+//      }
 
     } catch (Exception e) {
       SolrZkClient.checkInterrupted(e);
@@ -840,6 +833,12 @@ public class JettySolrRunner implements Closeable {
             withHttpClient(getCoreContainer().getUpdateShardHandler().getTheSharedHttpClient()).build();
   }
 
+  public SolrClient newClient(String collection) {
+    return new Http2SolrClient.Builder(getBaseUrl() + "/" + collection).
+        withHttpClient(getCoreContainer().getUpdateShardHandler().getTheSharedHttpClient()).build();
+  }
+
+
   public SolrClient newHttp1Client() {
     return new HttpSolrClient.Builder(getBaseUrl()).
         withHttpClient(getCoreContainer().getUpdateShardHandler().getDefaultHttpClient()).build();
diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java b/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java
index d7bb4a5..8995fbf 100644
--- a/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java
@@ -64,7 +64,12 @@ public class CloudConfigSetService extends ConfigSetService {
 
   @Override
   protected NamedList loadConfigSetFlags(CoreDescriptor cd, SolrResourceLoader loader) {
+    try {
       return ConfigSetProperties.readFromResourceLoader(loader, ".");
+    } catch (Exception ex) {
+      log.debug("No configSet flags", ex);
+      return null;
+    }
   }
 
   @Override
@@ -72,7 +77,7 @@ public class CloudConfigSetService extends ConfigSetService {
     String zkPath = ZkConfigManager.CONFIGS_ZKNODE + "/" + configSet + "/" + schemaFile;
     Stat stat;
     try {
-      stat = zkController.getZkClient().exists(zkPath, null);
+      stat = zkController.getZkClient().exists(zkPath, null, true);
     } catch (KeeperException e) {
       log.warn("Unexpected exception when getting modification time of {}", zkPath, e);
       return null; // debatable; we'll see an error soon if there's a real problem
diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java b/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java
index 4180280..749f365 100644
--- a/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java
@@ -38,8 +38,6 @@ public class CloudDescriptor {
   private String roles = null;
   private Integer numShards;
   private Map<String,String> collectionParams = new HashMap<>();
-
-  private volatile boolean isLeader = false;
   
   // set to true once a core has registered in zk
   // set to false on detecting a session expiration
@@ -88,14 +86,6 @@ public class CloudDescriptor {
   public void setLastPublished(Replica.State state) {
     lastPublished = state;
   }
-
-  public boolean isLeader() {
-    return isLeader;
-  }
-  
-  public void setLeader(boolean isLeader) {
-    this.isLeader = isLeader;
-  }
   
   public boolean hasRegistered() {
     return hasRegistered;
@@ -152,7 +142,6 @@ public class CloudDescriptor {
     if (reloadFrom.getNumShards() != null) {
       setNumShards(reloadFrom.getNumShards());
     }
-    setLeader(reloadFrom.isLeader);
     setHasRegistered(reloadFrom.hasRegistered);
     setLastPublished(reloadFrom.getLastPublished());
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
index 3785576..a0f6c51 100644
--- a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
@@ -105,6 +105,25 @@ public class CloudUtil {
     }
   }
 
+  public static boolean checkIfValidCloudCore(CoreContainer cc, CoreDescriptor desc) {
+    if (desc.getCloudDescriptor() == null) return false;
+    ZkController zkController = cc.getZkController();
+    String coreName = desc.getName();
+
+    // if we see our core node name on a different base url, unload
+    final DocCollection docCollection = zkController.getClusterState().getCollectionOrNull(desc.getCloudDescriptor().getCollectionName());
+    if (docCollection == null || docCollection.getReplica(coreName) == null) {
+
+      try {
+        cc.unload(desc.getName(), true, true, true);
+      } catch (Exception e) {
+        log.error("unload exception", e);
+      }
+      return false;
+    }
+    return true;
+  }
+
   /**
    * Returns a displayable unified path to the given resource. For non-solrCloud that will be the
    * same as getConfigDir, but for Cloud it will be getConfigSetZkPath ending in a /
@@ -200,7 +219,7 @@ public class CloudUtil {
         timeout.sleep(100);
         continue;
       }
-      if (predicate.matches(state.getLiveNodes(), coll)) {
+      if (predicate.matches(cloudManager.getClusterStateProvider().getLiveNodes(), coll)) {
         log.trace("-- predicate matched with state {}", state);
         return timeout.timeElapsed(TimeUnit.MILLISECONDS);
       }
diff --git a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
index c1e7bf4..e66622d 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
@@ -26,13 +26,15 @@ import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public abstract class ElectionContext implements Closeable {
+public abstract class ElectionContext {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
   protected final String electionPath;
   protected final Replica leaderProps;
   protected final String id;
   protected final String leaderPath;
   protected volatile String leaderSeqPath;
+  protected volatile String watchedSeqPath;
+
 
   public ElectionContext(final String id, final String electionPath, final String leaderPath, final Replica leaderProps) {
     this.id = id;
@@ -40,11 +42,6 @@ public abstract class ElectionContext implements Closeable {
     this.leaderPath = leaderPath;
     this.leaderProps = leaderProps;
 
-    assert ObjectReleaseTracker.track(this);
-  }
-
-  public void close() {
-    assert ObjectReleaseTracker.release(this);
   }
 
   protected void cancelElection() throws InterruptedException, KeeperException {
diff --git a/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java b/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
index 234208e..a9b0d9f 100644
--- a/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
+++ b/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
@@ -16,16 +16,6 @@
  */
 package org.apache.solr.cloud;
 
-import java.io.Closeable;
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
 import org.apache.solr.cloud.ZkController.ContextKey;
 import org.apache.solr.common.AlreadyClosedException;
 import org.apache.solr.common.ParWork;
@@ -34,6 +24,7 @@ import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.SolrZooKeeper;
 import org.apache.solr.common.cloud.ZooKeeperException;
 import org.apache.solr.common.util.IOUtils;
+import org.apache.solr.common.util.ObjectReleaseTracker;
 import org.apache.solr.core.SolrCore;
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
@@ -44,11 +35,21 @@ import org.apache.zookeeper.Watcher.Event.EventType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.Closeable;
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.concurrent.Future;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
 /**
  * Leader Election process. This class contains the logic by which a
  * leader is chosen. First call * {@link #setup(ElectionContext)} to ensure
  * the election process is init'd. Next call
- * {@link #joinElection(ElectionContext, boolean)} to start the leader election.
+ * {@link #joinElection(boolean)} to start the leader election.
  *
  * The implementation follows the classic ZooKeeper recipe of creating an
  * ephemeral, sequential node for each candidate and then looking at the set
@@ -65,6 +66,14 @@ public class LeaderElector implements Closeable {
 
   public final static Pattern LEADER_SEQ = Pattern.compile(".*?/?.*?-n_(\\d+)");
   private final static Pattern SESSION_ID = Pattern.compile(".*?/?(.*?-.*?)-n_\\d+");
+  private static final String JOINED = "j2";
+  private static final String JOIN = "j1";
+  private static final String CHECK_IF_LEADER = "lc";
+  private static final String OUT_OF_ELECTION = "o";
+  private static final String POT_LEADER = "pt";
+  private static final String LEADER = "l";
+  private static final String CLOSED = "c";
+  private static final String WAITING_IN_ELECTION = "w";
 
   protected final SolrZkClient zkClient;
   private final ZkController zkController;
@@ -73,9 +82,11 @@ public class LeaderElector implements Closeable {
 
   private volatile ElectionWatcher watcher;
 
-  private final Map<ContextKey,ElectionContext> electionContexts;
-  private final ContextKey contextKey;
   private volatile boolean isClosed;
+  private volatile Future<?> joinFuture;
+  private volatile boolean isCancelled;
+
+  private volatile String state = OUT_OF_ELECTION;
 
   //  public LeaderElector(SolrZkClient zkClient) {
 //    this.zkClient = zkClient;
@@ -83,12 +94,11 @@ public class LeaderElector implements Closeable {
 //    this.electionContexts = new ConcurrentHashMap<>(132, 0.75f, 50);
 //  }
 
-  public LeaderElector(ZkController zkController, ContextKey key, Map<ContextKey,ElectionContext> electionContexts) {
+  public LeaderElector(ZkController zkController, ContextKey key) {
 
     this.zkClient = zkController.getZkClient();
     this.zkController = zkController;
-    this.electionContexts = electionContexts;
-    this.contextKey = key;
+    assert ObjectReleaseTracker.track(this);
   }
 
   public ElectionContext getContext() {
@@ -103,7 +113,7 @@ public class LeaderElector implements Closeable {
    *
    * @param replacement has someone else been the leader already?
    */
-  private synchronized boolean checkIfIamLeader(final ElectionContext context, boolean replacement) throws KeeperException,
+  private boolean checkIfIamLeader(final ElectionContext context, boolean replacement) throws KeeperException,
           InterruptedException, IOException {
     //if (checkClosed(context)) return false;
 
@@ -116,8 +126,7 @@ public class LeaderElector implements Closeable {
       context.checkIfIamLeaderFired();
     });
 
-    boolean checkAgain = false;
-
+    state = CHECK_IF_LEADER;
     // get all other numbers...
     final String holdElectionPath = context.electionPath + ELECTION_NODE;
     List<String> seqs;
@@ -125,113 +134,143 @@ public class LeaderElector implements Closeable {
       seqs = zkClient.getChildren(holdElectionPath, null, true);
     } catch (KeeperException.SessionExpiredException e) {
       log.error("ZooKeeper session has expired");
+      state = OUT_OF_ELECTION;
       throw e;
     } catch (KeeperException.NoNodeException e) {
       log.info("the election node disappeared, check if we are the leader again");
+      state = OUT_OF_ELECTION;
       return true;
     } catch (KeeperException e) {
       // we couldn't set our watch for some other reason, retry
-      log.info("Failed setting election watch, retrying {} {}", e.getClass().getName(), e.getMessage());
+      log.warn("Failed setting election watch, retrying {} {}", e.getClass().getName(), e.getMessage());
+      state = OUT_OF_ELECTION;
       return true;
     } catch (Exception e) {
       // we couldn't set our watch for some other reason, retry
-      log.info("Failed on election getchildren call {} {}", e.getClass().getName(), e.getMessage());
-      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+      log.error("Failed on election getchildren call {} {}", e.getClass().getName(), e.getMessage());
+      state = OUT_OF_ELECTION;
+      return true;
     }
-    sortSeqs(seqs);
 
-    String leaderSeqNodeName;
     try {
-      leaderSeqNodeName = context.leaderSeqPath.substring(context.leaderSeqPath.lastIndexOf('/') + 1);
-    } catch (NullPointerException e) {
-      if (log.isDebugEnabled()) log.debug("leaderSeqPath has been removed, bailing");
-      return false;
-    }
-    if (!seqs.contains(leaderSeqNodeName)) {
-      log.warn("Our node is no longer in line to be leader");
-      return false;
-    }
 
-    if (leaderSeqNodeName.equals(seqs.get(0))) {
-      // I am the leader
-      if (log.isDebugEnabled()) log.debug("I am the potential leader {}, running leader process", context.leaderProps.getName());
+      sortSeqs(seqs);
 
+      String leaderSeqNodeName;
       try {
-        if (isClosed || (zkController != null && zkController.getCoreContainer().isShutDown())) {
+        leaderSeqNodeName = context.leaderSeqPath.substring(context.leaderSeqPath.lastIndexOf('/') + 1);
+      } catch (NullPointerException e) {
+        state = OUT_OF_ELECTION;
+        if (log.isDebugEnabled()) log.debug("leaderSeqPath has been removed, bailing");
+        return true;
+      }
+      if (!seqs.contains(leaderSeqNodeName)) {
+        log.warn("Our node is no longer in line to be leader");
+        state = OUT_OF_ELECTION;
+        return false;
+      }
+      if (log.isDebugEnabled()) log.debug("The leader election node is {}", leaderSeqNodeName);
+      if (leaderSeqNodeName.equals(seqs.get(0))) {
+        // I am the leader
+        if (log.isDebugEnabled()) log.debug("I am the potential leader {}, running leader process", context.leaderProps.getName());
+        ElectionWatcher oldWatcher = watcher;
+        if (oldWatcher != null) {
+          oldWatcher.close();
+        }
+
+        if ((zkController != null && zkController.getCoreContainer().isShutDown())) {
           if (log.isDebugEnabled()) log.debug("Elector is closed, will not try and run leader processes");
+          state = OUT_OF_ELECTION;
           return false;
         }
+
+        state = POT_LEADER;
         runIamLeaderProcess(context, replacement);
-      } catch (AlreadyClosedException e) {
         return false;
-      } catch (Exception e) {
-        log.error("", e);
-      }
 
-    } else {
-      log.info("I am not the leader (leaderSeqNodeName={}) - watch the node below me {} seqs={}", leaderSeqNodeName, context.getClass().getSimpleName(), seqs);
-      String toWatch = seqs.get(0);
-      for (String node : seqs) {
-        if (leaderSeqNodeName.equals(node)) {
-          break;
-        }
-        toWatch = node;
-      }
-      try {
-        String watchedNode = holdElectionPath + "/" + toWatch;
+      } else {
 
-        ElectionWatcher oldWatcher = watcher;
-        if (oldWatcher != null) {
-          oldWatcher.close();
+        String toWatch = seqs.get(0);
+        for (String node : seqs) {
+          if (leaderSeqNodeName.equals(node)) {
+            break;
+          }
+          toWatch = node;
         }
-        zkClient.exists(watchedNode, watcher = new ElectionWatcher(context.leaderSeqPath,
-            watchedNode, getSeq(context.leaderSeqPath), context));
-        if (log.isDebugEnabled()) log.debug("Watching path {} to know if I could be the leader", watchedNode);
-      } catch (KeeperException.SessionExpiredException e) {
-        log.error("ZooKeeper session has expired");
-        throw e;
-      } catch (KeeperException.NoNodeException e) {
-        log.info("the previous node disappeared, check if we are the leader again");
-        checkAgain = true;
-      } catch (KeeperException e) {
-        // we couldn't set our watch for some other reason, retry
-        log.info("Failed setting election watch, retrying {} {}", e.getClass().getName(), e.getMessage());
-        checkAgain = true;
-      } catch (Exception e) {
-        // we couldn't set our watch for some other reason, retry
-        log.info("Failed setting election watch {} {}", e.getClass().getName(), e.getMessage());
-        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
-      }
+        try {
+          String watchedNode = holdElectionPath + "/" + toWatch;
 
-      if (checkAgain) {
-        return true;
-      }
-    }
-    return false;
-  }
+          log.info("I am not the leader (our path is ={}) - watch the node below me {} seqs={}", leaderSeqNodeName, watchedNode, seqs);
 
-  private boolean checkClosed(ElectionContext context) {
-    if (isClosed || (zkController != null && zkController.getCoreContainer().isShutDown())) {
-      if (log.isDebugEnabled()) log.debug("Will not checkIfIamLeader, elector is closed");
-      return true;
-    }
-    if (zkController != null) {
-      try (SolrCore core = zkController.getCoreContainer().getCore(context.leaderProps.getName())) {
-        if (core != null) {
-          if (core.isClosing()) {
-            if (log.isDebugEnabled()) log.debug("Will not checkIfIamLeader, SolrCore is closed");
-            return true;
+          ElectionWatcher oldWatcher = watcher;
+          if (oldWatcher != null) {
+            IOUtils.closeQuietly(oldWatcher);
           }
+          if (context.leaderSeqPath == null) {
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Election has been cancelled");
+          }
+          zkClient.exists(watchedNode, watcher = new ElectionWatcher(context.leaderSeqPath, watchedNode, context));
+          state = WAITING_IN_ELECTION;
+          if (log.isDebugEnabled()) log.debug("Watching path {} to know if I could be the leader, my node is {}", watchedNode, context.leaderSeqPath);
+          try (SolrCore core = zkController.getCoreContainer().getCore(context.leaderProps.getName())) {
+            if (core != null) {
+             // if (!core.getSolrCoreState().isRecoverying()) {
+                core.getSolrCoreState().doRecovery(core);
+             // }
+            }
+          }
+          return false;
+        } catch (KeeperException.SessionExpiredException e) {
+          state = OUT_OF_ELECTION;
+          log.error("ZooKeeper session has expired");
+          throw e;
+        } catch (KeeperException.NoNodeException e) {
+          log.info("the previous node disappeared, check if we are the leader again");
+
+        } catch (KeeperException e) {
+          // we couldn't set our watch for some other reason, retry
+          log.warn("Failed setting election watch, retrying {} {}", e.getClass().getName(), e.getMessage());
+
+        } catch (Exception e) {
+          state = OUT_OF_ELECTION;
+          // we couldn't set our watch for some other reason, retry
+          log.error("Failed setting election watch {} {}", e.getClass().getName(), e.getMessage());
+
         }
+
       }
+
+    } catch (KeeperException.SessionExpiredException e) {
+      log.error("ZooKeeper session has expired");
+      state = OUT_OF_ELECTION;
+      throw e;
+    } catch (AlreadyClosedException e) {
+      state = OUT_OF_ELECTION;
+      return false;
+    } catch (Exception e) {
+      state = OUT_OF_ELECTION;
+      return true;
     }
-    return false;
+
+    state = OUT_OF_ELECTION;
+    return true;
   }
 
+
   // TODO: get this core param out of here
   protected void runIamLeaderProcess(final ElectionContext context, boolean weAreReplacement) throws KeeperException,
           InterruptedException, IOException {
+    if (state == CLOSED) {
+      throw new AlreadyClosedException();
+    }
+    if (state == LEADER) {
+      throw new IllegalStateException("Already in leader state");
+    }
+
     context.runLeaderProcess(context, weAreReplacement,0);
+
+
+    state = LEADER;
   }
 
   /**
@@ -269,8 +308,21 @@ public class LeaderElector implements Closeable {
 
   }
 
-  public boolean joinElection(ElectionContext context, boolean replacement) throws KeeperException, InterruptedException, IOException {
-    return joinElection(context,replacement, false);
+  public void joinElection(boolean replacement) {
+    joinElection(replacement, false);
+  }
+
+  public void joinElection(boolean replacement,boolean joinAtHead) {
+    if (!isClosed && !zkController.getCoreContainer().isShutDown() && !zkController.isDcCalled() && zkClient.isAlive()) {
+      joinFuture = ParWork.getRootSharedExecutor().submit(() -> {
+        try {
+          isCancelled = false;
+          doJoinElection(context, replacement, joinAtHead);
+        } catch (Exception e) {
+          log.error("Exception trying to join election", e);
+        }
+      });
+    }
   }
 
   /**
@@ -280,13 +332,21 @@ public class LeaderElector implements Closeable {
    * node that is watched goes down, check if we are the new lowest node, else
    * watch the next lowest numbered node.
    *
-   * @return sequential node number
    */
-  public synchronized boolean joinElection(ElectionContext context, boolean replacement,boolean joinAtHead) throws KeeperException, InterruptedException, IOException {
+  public synchronized void doJoinElection(ElectionContext context, boolean replacement,boolean joinAtHead) throws KeeperException, InterruptedException, IOException {
     //if (checkClosed(context)) return false;
-    if (isClosed) {
+    if (shouldRejectJoins() || state == CLOSED) {
       log.info("elector is closed, won't join election");
+      throw new AlreadyClosedException();
     }
+
+    if (state != OUT_OF_ELECTION) {
+      throw new IllegalStateException("Expected " + OUT_OF_ELECTION + " but got " + state);
+    }
+    state = JOIN;
+
+    isCancelled = false;
+
     ParWork.getRootSharedExecutor().submit(() -> {
       context.joinedElectionFired();
     });
@@ -304,7 +364,7 @@ public class LeaderElector implements Closeable {
           if (log.isDebugEnabled()) log.debug("Node {} trying to join election at the head", id);
           List<String> nodes = OverseerTaskProcessor.getSortedElectionNodes(zkClient, shardsElectZkPath);
           if(nodes.size() <2){
-            leaderSeqPath = zkClient.create(shardsElectZkPath + "/" + id + "-n_", null,
+            leaderSeqPath = zkClient.create(shardsElectZkPath + "/" + id + "-n_", (byte[]) null,
                     CreateMode.EPHEMERAL_SEQUENTIAL, true);
           } else {
             String firstInLine = nodes.get(1);
@@ -315,16 +375,17 @@ public class LeaderElector implements Closeable {
                       + firstInLine);
             }
             leaderSeqPath = shardsElectZkPath + "/" + id + "-n_"+ m.group(1);
-            zkClient.create(leaderSeqPath, null, CreateMode.EPHEMERAL, false);
+            zkClient.create(leaderSeqPath, (byte[]) null, CreateMode.EPHEMERAL, false);
           }
         } else {
           if (log.isDebugEnabled()) log.debug("create ephem election node {}", shardsElectZkPath + "/" + id + "-n_");
-              leaderSeqPath = zkClient.create(shardsElectZkPath + "/" + id + "-n_", null,
+              leaderSeqPath = zkClient.create(shardsElectZkPath + "/" + id + "-n_", (byte[]) null,
                       CreateMode.EPHEMERAL_SEQUENTIAL, false);
         }
 
         log.info("Joined leadership election with path: {}", leaderSeqPath);
         context.leaderSeqPath = leaderSeqPath;
+        state = JOIN;
         cont = false;
       } catch (ConnectionLossException e) {
         // we don't know if we made our node or not...
@@ -363,17 +424,26 @@ public class LeaderElector implements Closeable {
     int seq = getSeq(context.leaderSeqPath);
 
     if (log.isDebugEnabled()) log.debug("Do checkIfIamLeader");
+    boolean tryagain = true;
 
-    boolean tryagain = checkIfIamLeader(context, replacement);
-
-    if (tryagain) {
-      Thread.sleep(100);
+    while (tryagain) {
       tryagain = checkIfIamLeader(context, replacement);
-    }
 
-    if (tryagain) {
-      Thread.sleep(100);
-      checkIfIamLeader(context, replacement);
+      if (tryagain) {
+        try {
+          try (SolrCore core = zkController.getCoreContainer().getCore(context.leaderProps.getName())) {
+            if (core != null) {
+              if (!core.getSolrCoreState().isRecoverying()) {
+                core.getSolrCoreState().doRecovery(core);
+              }
+            }
+          }
+        } catch (Exception e) {
+          log.error("Exception trying to kick off or check for recovery", e);
+        }
+
+      }
+
     }
 
 
@@ -385,21 +455,70 @@ public class LeaderElector implements Closeable {
 //      }
 //    }
 
-    return false;
+  }
+
+  private boolean shouldRejectJoins() {
+    return zkController.getCoreContainer().isShutDown() || zkController.isDcCalled();
   }
 
   @Override
   public void close() throws IOException {
+    assert ObjectReleaseTracker.release(this);
+    state = CLOSED;
+    this.isClosed = true;
+    IOUtils.closeQuietly(watcher);
     if (context != null) {
       try {
         context.cancelElection();
       } catch (Exception e) {
         log.warn("Exception canceling election", e);
       }
-      context.close();
     }
-    IOUtils.closeQuietly(watcher);
-    this.isClosed = true;
+    try {
+      if (joinFuture != null) {
+        joinFuture.cancel(false);
+      }
+    } catch (NullPointerException e) {
+      // okay
+    }
+  }
+
+  public void cancel() {
+
+    if (state == OUT_OF_ELECTION || state == CLOSED) {
+      return;
+    }
+
+    state = OUT_OF_ELECTION;
+
+    try {
+      this.isCancelled = true;
+      IOUtils.closeQuietly(watcher);
+      if (context != null) {
+        context.cancelElection();
+      }
+      Future<?> jf = joinFuture;
+      if (jf != null) {
+        jf.cancel(true);
+//        if (!shouldRejectJoins()) {
+//          try {
+//            jf.get(500, TimeUnit.MILLISECONDS);
+//
+//          } catch (TimeoutException e) {
+//
+//          } catch (Exception e) {
+//            log.error("Exception waiting for previous election attempt to finish {} {} cause={}", e.getClass().getSimpleName(), e.getMessage());
+//          }
+//        }
+
+      }
+    } catch (Exception e) {
+      log.warn("Exception canceling election", e);
+    }
+  }
+
+  public boolean isClosed() {
+    return isClosed;
   }
 
   private class ElectionWatcher implements Watcher, Closeable {
@@ -408,7 +527,7 @@ public class LeaderElector implements Closeable {
 
     private volatile boolean canceled = false;
 
-    private ElectionWatcher(String myNode, String watchedNode, int seq, ElectionContext context) {
+    private ElectionWatcher(String myNode, String watchedNode, ElectionContext context) {
       this.myNode = myNode;
       this.watchedNode = watchedNode;
       this.context = context;
@@ -420,20 +539,23 @@ public class LeaderElector implements Closeable {
       if (EventType.None.equals(event.getType())) {
         return;
       }
-      if (canceled) {
-        if (log.isDebugEnabled()) log.debug("This watcher is not active anymore {}", myNode);
+
+      if (log.isDebugEnabled()) log.debug("Got event on node we where watching in leader line {} watchedNode={}", myNode, watchedNode);
+
+      if (isCancelled || isClosed) {
+        if (log.isDebugEnabled()) log.debug("This watcher is not active anymore {} isCancelled={} isClosed={}", myNode, isCancelled, isClosed);
         return;
       }
       try {
         // am I the next leader?
         boolean tryagain = checkIfIamLeader(context, true);
         if (tryagain) {
-          Thread.sleep(100);
+          Thread.sleep(50);
           tryagain = checkIfIamLeader(context, true);
         }
 
         if (tryagain) {
-          Thread.sleep(100);
+          Thread.sleep(50);
           checkIfIamLeader(context, true);
         }
       } catch (AlreadyClosedException | InterruptedException e) {
@@ -448,17 +570,12 @@ public class LeaderElector implements Closeable {
     @Override
     public void close() throws IOException {
       SolrZooKeeper zk = zkClient.getSolrZooKeeper();
-      if (zk != null) {
-        try {
-          zk.removeWatches(watchedNode, this, WatcherType.Any, true);
-        } catch (KeeperException.NoWatcherException e) {
-          // okay
-        } catch (InterruptedException e) {
-          log.info("Interrupted removing leader watch");
-        } catch (KeeperException e) {
-          log.error("Exception removing leader watch", e);
-        }
+      try {
+        zk.removeWatches(watchedNode, this, WatcherType.Any, true);
+      } catch (Exception e) {
+        log.info("could not remove watch {} {}", e.getClass().getSimpleName(), e.getMessage());
       }
+
       canceled = true;
     }
   }
@@ -467,10 +584,6 @@ public class LeaderElector implements Closeable {
    * Set up any ZooKeeper nodes needed for leader election.
    */
   public void setup(final ElectionContext context) {
-    ElectionContext tmpContext = this.context;
-    if (tmpContext != null) {
-      tmpContext.close();
-    }
     this.context = context;
   }
 
@@ -481,20 +594,16 @@ public class LeaderElector implements Closeable {
     Collections.sort(seqs, Comparator.comparingInt(LeaderElector::getSeq).thenComparing(o -> o));
   }
 
-  synchronized  void retryElection(ElectionContext context, boolean joinAtHead) throws KeeperException, InterruptedException, IOException {
-    ElectionWatcher watcher = this.watcher;
-    if (electionContexts != null) {
-      ElectionContext prevContext = electionContexts.put(contextKey, context);
-      if (prevContext != null) {
-        prevContext.close();
-      }
+  void retryElection(boolean joinAtHead) {
+    if (shouldRejectJoins()) {
+      throw new AlreadyClosedException();
     }
-    if (watcher != null) watcher.close();
-    this.context.close();
-    this.context = context;
-    this.close();
-    isClosed = false;
-    joinElection(context, true, joinAtHead);
+    cancel();
+    ElectionWatcher watcher = this.watcher;
+    IOUtils.closeQuietly(watcher);
+    IOUtils.closeQuietly(this);
+    isCancelled = false;
+    joinElection(true, joinAtHead);
   }
 
 }
diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
index 861083c..8c79436 100644
--- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
@@ -54,6 +54,7 @@ import org.apache.solr.update.UpdateShardHandler;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;
+import org.apache.zookeeper.data.Stat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -72,6 +73,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.SynchronousQueue;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.function.BiConsumer;
 
@@ -146,9 +148,6 @@ public class Overseer implements SolrCloseable {
   public static final String OVERSEER_COLLECTION_MAP_RUNNING = "/overseer/collection-map-running";
 
 
-  // System properties are used in tests to make them run fast
-  public static final int STATE_UPDATE_DELAY = ZkStateReader.STATE_UPDATE_DELAY;
-  public static final int STATE_UPDATE_BATCH_SIZE = Integer.getInteger("solr.OverseerStateUpdateBatchSize", 10000);
   public static final int STATE_UPDATE_MAX_QUEUE = 20000;
 
   public static final int NUM_RESPONSES_TO_STORE = 10000;
@@ -283,7 +282,6 @@ public class Overseer implements SolrCloseable {
     OUR_JVM_OVERSEER = this;
 
   //  doClose();
-    closed = false;
 
     MDCLoggingContext.setNode(zkController == null ?
         null :
@@ -350,6 +348,8 @@ public class Overseer implements SolrCloseable {
     queueWatcher = new WorkQueueWatcher(getCoreContainer());
     collectionQueueWatcher = new WorkQueueWatcher.CollectionWorkQueueWatcher(getCoreContainer(), id, overseerLbClient, adminPath, stats, Overseer.this);
 
+
+    closed = false;
     // TODO: don't track for a moment, can leak out of collection api tests
     // assert ObjectReleaseTracker.track(this);
   }
@@ -367,7 +367,7 @@ public class Overseer implements SolrCloseable {
     // check that all shard leaders are active
     boolean allActive = true;
     for (Slice s : coll.getActiveSlices()) {
-      if (s.getLeader() == null || !s.getLeader().isActive(clusterState.getLiveNodes())) {
+      if (s.getLeader() == null || !s.getLeader().isActive(zkController.getZkStateReader().getLiveNodes())) {
         allActive = false;
         break;
       }
@@ -479,21 +479,30 @@ public class Overseer implements SolrCloseable {
 
 
   public void closeAndDone() {
-    this.closeAndDone = true;
-
+    synchronized (this) {
+      this.closed = true;
+      this.closeAndDone = true;
+    }
+    close();
   }
 
   public boolean isCloseAndDone() {
     return closeAndDone;
   }
 
-  public synchronized void close() {
+  public void close() {
     log.info("Overseer (id={}) closing closeAndDone={}}", id, closeAndDone);
 
-    closed = true;
+    boolean cd = closeAndDone;
+
+    if (cd) {
+      if (taskExecutor != null) {
+        taskExecutor.shutdown();
+      }
+    }
 
-    log.info("doClose - closeAndDone={}", closeAndDone);
-    this.zkStateWriter  = null;
+    OUR_JVM_OVERSEER = null;
+    closed = true;
 
     if (queueWatcher != null) {
       queueWatcher.close();
@@ -503,29 +512,24 @@ public class Overseer implements SolrCloseable {
       collectionQueueWatcher.close();
     }
 
+    this.zkStateWriter = null;
+    if (!cd) {
+      boolean retry;
+      synchronized (this) {
+        retry = !zkController.getCoreContainer().isShutDown() && !zkController.isShudownCalled() && !zkController.isClosed() && !closeAndDone;
+      }
+      if (retry) {
+        log.info("rejoining the overseer election after closing");
+        zkController.rejoinOverseerElection(false);
+      }
 
-    //    if (stateManagmentExecutor != null) {
-    //      log.info("shutdown stateManagmentExecutor");
-    //      stateManagmentExecutor.shutdown();
-    //    }
-    //
-
-    //    if (stateManagmentExecutor != null) {
-    //      stateManagmentExecutor.shutdownNow();
-    //    }
-
-    //   ExecutorUtil.shutdownAndAwaitTermination(stateManagmentExecutor );
-
+    }
 
-    if (taskExecutor != null) {
-      try {
+    if (cd) {
+      if (taskExecutor != null && !taskExecutor.isShutdown()) {
         taskExecutor.shutdown();
-      } catch (NullPointerException okay) {
-        // okay
       }
-    }
 
-    if (closeAndDone) {
       if (overseerOnlyClient != null) {
         overseerOnlyClient.disableCloseLock();
       }
@@ -539,21 +543,24 @@ public class Overseer implements SolrCloseable {
         overseerOnlyClient.close();
         overseerOnlyClient = null;
       }
-    }
-    ExecutorUtil.shutdownAndAwaitTermination(taskExecutor );
 
-    if (!closeAndDone) {
-      if (!zkController.getCoreContainer().isShutDown() && !zkController.isShudownCalled() && !zkController.isClosed()) {
-        log.info("rejoining the overseer election after closing");
-        zkController.rejoinOverseerElection( false);
+      if (taskExecutor != null && taskExecutor.isShutdown() && !taskExecutor.isTerminated()) {
+        try {
+          taskExecutor.awaitTermination(5, TimeUnit.SECONDS);
+        } catch (InterruptedException e) {
+
+        }
+
+        taskExecutor.shutdownNow();
+        ExecutorUtil.shutdownAndAwaitTermination(taskExecutor);
       }
+
     }
 
     if (log.isDebugEnabled()) {
       log.debug("doClose - end");
     }
 
-    OUR_JVM_OVERSEER = null;
     assert ObjectReleaseTracker.release(this);
   }
 
@@ -718,7 +725,7 @@ public class Overseer implements SolrCloseable {
   public boolean processQueueItem(ZkNodeProps message) throws InterruptedException {
     if (log.isDebugEnabled()) log.debug("processQueueItem {}", message);
     // nocommit - may not need this now
-   new OverseerTaskExecutorTask(getCoreContainer(), zkStateWriter, message).run();
+    new OverseerTaskExecutorTask(getCoreContainer(), zkStateWriter, message).run();
 //    try {
 //      future.get();
 //    } catch (ExecutionException e) {
@@ -729,9 +736,11 @@ public class Overseer implements SolrCloseable {
   }
 
   public void writePendingUpdates() {
-    if (log.isDebugEnabled()) log.debug("writePendingUpdates");
-
-    new OverseerTaskExecutorTask.WriteTask(getCoreContainer(), zkStateWriter).run();
+    if (zkStateWriter != null) {
+      new OverseerTaskExecutorTask.WriteTask(getCoreContainer(), zkStateWriter).run();
+    } else {
+      throw new AlreadyClosedException("No longer Overseer it seems, won't write pending updates");
+    }
   }
 
   private static abstract class QueueWatcher implements Watcher, Closeable {
@@ -750,7 +759,7 @@ public class Overseer implements SolrCloseable {
       this.path = path;
       List<String> items = setWatch();
       log.info("Overseer found entries on start {}", items);
-      processQueueItems(items);
+      processQueueItems(items, true);
     }
 
     private List<String> setWatch() {
@@ -792,7 +801,7 @@ public class Overseer implements SolrCloseable {
         try {
           List<String> items = setWatch();
           if (items.size() > 0) {
-            processQueueItems(items);
+            processQueueItems(items, false);
           }
         } catch (Exception e) {
           log.error("Exception during overseer queue queue processing", e);
@@ -803,7 +812,7 @@ public class Overseer implements SolrCloseable {
 
     }
 
-    protected abstract void processQueueItems(List<String> items) throws KeeperException;
+    protected abstract void processQueueItems(List<String> items, boolean onStart) throws KeeperException;
 
     @Override
     public void close() {
@@ -813,7 +822,7 @@ public class Overseer implements SolrCloseable {
         try {
           zkController.getZkClient().getSolrZooKeeper().removeWatches(path, this, WatcherType.Data, true);
         } catch (Exception e) {
-          log.info("", e.getMessage());
+          log.info("could not remove watch {} {}", e.getClass().getSimpleName(), e.getMessage());
         }
       } finally {
         ourLock.unlock();
@@ -828,7 +837,7 @@ public class Overseer implements SolrCloseable {
     }
 
     @Override
-    protected void processQueueItems(List<String> items) {
+    protected void processQueueItems(List<String> items, boolean onStart) {
       ourLock.lock();
       try {
         log.info("Found state update queue items {}", items);
@@ -850,11 +859,10 @@ public class Overseer implements SolrCloseable {
         }
 
         overseer.writePendingUpdates();
-
         zkController.getZkClient().delete(fullPaths, true);
 
-      } finally {
 
+      } finally {
         ourLock.unlock();
       }
     }
@@ -885,7 +893,7 @@ public class Overseer implements SolrCloseable {
       }
 
       @Override
-      protected void processQueueItems(List<String> items) {
+      protected void processQueueItems(List<String> items, boolean onStart) {
 
         ourLock.lock();
         try {
@@ -897,15 +905,9 @@ public class Overseer implements SolrCloseable {
 
           Map<String,byte[]> data = zkController.getZkClient().getData(fullPaths);
 
-          try {
-            zkController.getZkClient().delete(fullPaths, true);
-          } catch (Exception e) {
-            log.warn("Exception deleting processed zk nodes", e);
-          }
-
           overseer.getTaskExecutor().submit(() -> {
             try {
-              runAsync(items, fullPaths, data);
+              runAsync(items, fullPaths, data, onStart);
             } catch (Exception e) {
               log.error("failed processing collection queue items " + items, e);
             }
@@ -916,68 +918,109 @@ public class Overseer implements SolrCloseable {
 
       }
 
-      private void runAsync(List<String> items, List<String> fullPaths, Map<String,byte[]> data) {
-        for (Map.Entry<String,byte[]> entry : data.entrySet()) {
-          try {
-            byte[] item = entry.getValue();
-            if (item == null) {
-              log.error("empty item {}", entry.getKey());
-              continue;
-            }
-
-            final ZkNodeProps message = ZkNodeProps.load(item);
-            try {
-              String operation = message.getStr(Overseer.QUEUE_OPERATION);
-              if (operation == null) {
-                log.error("Msg does not have required " + Overseer.QUEUE_OPERATION + ": {}", message);
-                continue;
-              }
+      private void runAsync(List<String> items, List<String> fullPaths, Map<String,byte[]> data, boolean onStart) {
+        if (configMessageHandler == null) {
+          return;
+        }
+        ZkStateWriter zkWriter = overseer.getZkStateWriter();
+        if (zkWriter == null) {
+          log.warn("Overseer appears closed");
+          throw new AlreadyClosedException();
+        }
 
-              final String asyncId = message.getStr(ASYNC);
+        try (ParWork work = new ParWork(this, false, true)) {
+          for (Map.Entry<String,byte[]> entry : data.entrySet()) {
+            work.collect("", ()->{
+              try {
+                byte[] item = entry.getValue();
+                if (item == null) {
+                  log.error("empty item {}", entry.getKey());
+                  zkController.getZkClient().delete(entry.getKey(), -1);
+                  return;
+                }
+                String responsePath = Overseer.OVERSEER_COLLECTION_MAP_COMPLETED + "/" + OverseerTaskQueue.RESPONSE_PREFIX + entry.getKey().substring(entry.getKey().lastIndexOf("-") + 1);
 
-              OverseerSolrResponse response;
-              if (operation != null && operation.startsWith(CONFIGSETS_ACTION_PREFIX)) {
-                response = configMessageHandler.processMessage(message, operation);
-              } else {
-                response = collMessageHandler.processMessage(message, operation);
-              }
+                final ZkNodeProps message = ZkNodeProps.load(item);
+                zkController.getZkClient().delete(entry.getKey(), -1);
+                try {
+                  String operation = message.getStr(Overseer.QUEUE_OPERATION);
+
+                  if (onStart) {
+                    log.info("Found operation on start {} {}", responsePath, message);
+
+                    Stat stat = zkController.getZkClient().exists(responsePath, null);
+                    if (stat != null && stat.getDataLength() == 0) {
+                      log.info("Found response and no data on start for {} {}", message, responsePath);
+
+                      OverseerSolrResponse rsp = collMessageHandler.processMessage(message, "cleanup", zkWriter);
+                      if (rsp == null) {
+                      //  zkController.getZkClient().delete(entry.getKey(), -1);
+                        log.info("Set response data since operation looked okay {} {}", message, responsePath);
+                        NamedList response = new NamedList();
+                        response.add("success", true);
+                        OverseerSolrResponse osr = new OverseerSolrResponse(response);
+                        byte[] sdata = OverseerSolrResponseSerializer.serialize(osr);
+                        zkController.getZkClient().setData(responsePath, sdata, true);
+                        return;
+                      } else {
+                        log.info("Tried to cleanup partially executed cmd {} {}", message, responsePath);
+                      }
+                    }
+                  }
 
-              //          try {
-              //            overseer.writePendingUpdates();
-              //          } catch (InterruptedException e) {
-              //            log.error("Overseer state update queue processing interrupted");
-              //            return;
-              //          }
+                  if (operation == null) {
+                    log.error("Msg does not have required " + Overseer.QUEUE_OPERATION + ": {}", message);
+                    return;
+                  }
 
-              if (log.isDebugEnabled()) log.debug("response {}", response);
+                  final String asyncId = message.getStr(ASYNC);
 
-              if (asyncId != null) {
-                if (response != null && (response.getResponse().get("failure") != null || response.getResponse().get("exception") != null)) {
-                  if (log.isDebugEnabled()) {
-                    log.debug("Updated failed map for task with id:[{}]", asyncId);
+                  OverseerSolrResponse response;
+                  if (operation != null && operation.startsWith(CONFIGSETS_ACTION_PREFIX)) {
+                    response = configMessageHandler.processMessage(message, operation, zkWriter);
+                  } else {
+                    response = collMessageHandler.processMessage(message, operation, zkWriter);
                   }
-                  failureMap.put(asyncId, OverseerSolrResponseSerializer.serialize(response));
-                } else {
-                  if (log.isDebugEnabled()) {
-                    log.debug("Updated completed map for task with zkid:[{}]", asyncId);
+
+                  //          try {
+                  //            overseer.writePendingUpdates();
+                  //          } catch (InterruptedException e) {
+                  //            log.error("Overseer state update queue processing interrupted");
+                  //            return;
+                  //          }
+
+                  if (log.isDebugEnabled()) log.debug("response {}", response);
+
+                  if (asyncId != null) {
+                    if (response != null && (response.getResponse().get("failure") != null || response.getResponse().get("exception") != null)) {
+                      if (log.isDebugEnabled()) {
+                        log.debug("Updated failed map for task with id:[{}]", asyncId);
+                      }
+                      failureMap.put(asyncId, OverseerSolrResponseSerializer.serialize(response));
+                    } else {
+                      if (log.isDebugEnabled()) {
+                        log.debug("Updated completed map for task with zkid:[{}]", asyncId);
+                      }
+                      completedMap.put(asyncId, OverseerSolrResponseSerializer.serialize(response));
+
+                    }
+                  } else {
+                    byte[] sdata = OverseerSolrResponseSerializer.serialize(response);
+                    zkController.getZkClient().setData(responsePath, sdata, true);
+                    //completedMap.put(entry.getKey().substring(entry.getKey().lastIndexOf("-") + 1), sdata);
+                    log.info("Completed task:[{}] {} {}", message, response.getResponse(), responsePath);
                   }
-                  completedMap.put(asyncId, OverseerSolrResponseSerializer.serialize(response));
 
+                } catch (InterruptedException e) {
+                  log.error("Overseer state update queue processing interrupted");
+                  return;
                 }
-              } else {
-                byte[] sdata = OverseerSolrResponseSerializer.serialize(response);
-                String responsePath = Overseer.OVERSEER_COLLECTION_MAP_COMPLETED + "/" + OverseerTaskQueue.RESPONSE_PREFIX + entry.getKey().substring(entry.getKey().lastIndexOf("-") + 1);
-                zkController.getZkClient().setData(responsePath, sdata, true);
-                log.debug("Completed task:[{}] {}", message, response.getResponse());
-              }
 
-            } catch (InterruptedException e) {
-              log.error("Overseer state update queue processing interrupted");
-              return;
-            }
+              } catch (Exception e) {
+                log.warn("Exception deleting processed zk nodes", e);
+              }
+            });
 
-          } catch (Exception e) {
-            log.warn("Exception deleting processed zk nodes", e);
           }
         }
       }
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerConfigSetMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/OverseerConfigSetMessageHandler.java
index c70048d..c1fbfa3 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerConfigSetMessageHandler.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerConfigSetMessageHandler.java
@@ -27,6 +27,7 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.solr.cloud.overseer.ZkStateWriter;
 import org.apache.solr.common.ParWork;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
@@ -91,7 +92,7 @@ public class OverseerConfigSetMessageHandler implements OverseerMessageHandler {
   }
 
   @Override
-  public OverseerSolrResponse processMessage(ZkNodeProps message, String operation) {
+  public OverseerSolrResponse processMessage(ZkNodeProps message, String operation, ZkStateWriter zkStateWriter) {
     NamedList results = new NamedList();
     try {
       if (!operation.startsWith(CONFIGSETS_ACTION_PREFIX)) {
@@ -306,13 +307,15 @@ public class OverseerConfigSetMessageHandler implements OverseerMessageHandler {
 
     String propertyPath = ConfigSetProperties.DEFAULT_FILENAME;
     Map<String, Object> props = getNewProperties(message);
-    if (props != null) {
-      // read the old config properties and do a merge, if necessary
-      NamedList oldProps = getConfigSetProperties(getPropertyPath(baseConfigSetName, propertyPath));
-      if (oldProps != null) {
-        mergeOldProperties(props, oldProps);
-      }
+    if (props == null) {
+      props = new HashMap<>();
+    }
+    // read the old config properties and do a merge, if necessary
+    NamedList oldProps = getConfigSetProperties(getPropertyPath(baseConfigSetName, propertyPath));
+    if (oldProps != null) {
+      mergeOldProperties(props, oldProps);
     }
+
     byte[] propertyData = getPropertyData(props);
 
     Set<String> copiedToZkPaths = new HashSet<String>();
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/OverseerElectionContext.java
index 8c2a8bf..a56e2ea 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerElectionContext.java
@@ -35,7 +35,6 @@ final class OverseerElectionContext extends ShardLeaderElectionContextBase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
   private final SolrZkClient zkClient;
   private final Overseer overseer;
-  private volatile boolean isClosed = false;
 
   public OverseerElectionContext(final String zkNodeName, SolrZkClient zkClient, Overseer overseer) {
     super(zkNodeName, Overseer.OVERSEER_ELECT, Overseer.OVERSEER_ELECT + "/leader", new Replica(overseer.getZkController().getNodeName(), getIDMap(zkNodeName, overseer), null, null, overseer.getZkStateReader()), zkClient);
@@ -86,7 +85,6 @@ final class OverseerElectionContext extends ShardLeaderElectionContextBase {
       overseer.start(id, context);
     } else {
       log.info("Will not start Overseer because we are closed");
-      cancelElection();
     }
 
   }
@@ -97,38 +95,28 @@ final class OverseerElectionContext extends ShardLeaderElectionContextBase {
 
   @Override
   public void cancelElection() throws InterruptedException, KeeperException {
-    try (ParWork closer = new ParWork(this, true)) {
-      if (zkClient.isConnected()) {
-        closer.collect("cancelElection", () -> {
-          try {
-            super.cancelElection();
-          } catch (Exception e) {
-            ParWork.propagateInterrupt(e);
-            log.error("Exception closing Overseer", e);
-          }
-        });
-      }
-      closer.collect("overseer", () -> {
-        try {
-          overseer.close();
-        } catch (Exception e) {
-          ParWork.propagateInterrupt(e);
-          log.error("Exception closing Overseer", e);
-        }
-      });
-    }
-  }
+    //try (ParWork closer = new ParWork(this, true)) {
 
-  @Override
-  public void close() {
-    this.isClosed  = true;
+    //    closer.collect("cancelElection", () -> {
     try {
-      cancelElection();
+      super.cancelElection();
     } catch (Exception e) {
       ParWork.propagateInterrupt(e);
-      log.error("Exception canceling election", e);
+      log.error("Exception closing Overseer", e);
     }
-    super.close();
+    //  });
+
+    //      closer.collect("overseer", () -> {
+    //        try {
+    //          if (!overseer.isCloseAndDone()) {
+    //            overseer.close();
+    //          }
+    //        } catch (Exception e) {
+    //          ParWork.propagateInterrupt(e);
+    //          log.error("Exception closing Overseer", e);
+    //        }
+    //      });
+    //   }
   }
 
   @Override
@@ -140,10 +128,5 @@ final class OverseerElectionContext extends ShardLeaderElectionContextBase {
   public void checkIfIamLeaderFired() {
 
   }
-
-  @Override
-  public boolean isClosed() {
-    return isClosed || !zkClient.isConnected();
-  }
 }
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/OverseerMessageHandler.java
index a01cc10..891040f 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerMessageHandler.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerMessageHandler.java
@@ -16,6 +16,7 @@
  */
 package org.apache.solr.cloud;
 
+import org.apache.solr.cloud.overseer.ZkStateWriter;
 import org.apache.solr.common.cloud.ZkNodeProps;
 
 import java.io.Closeable;
@@ -31,7 +32,7 @@ public interface OverseerMessageHandler extends Closeable {
    *
    * @return response
    */
-  OverseerSolrResponse processMessage(ZkNodeProps message, String operation) throws InterruptedException;
+  OverseerSolrResponse processMessage(ZkNodeProps message, String operation, ZkStateWriter zkStateWriter) throws InterruptedException;
 
   /**
    * @return the name of the OverseerMessageHandler
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerTaskExecutorTask.java b/solr/core/src/java/org/apache/solr/cloud/OverseerTaskExecutorTask.java
index 5495d01..b9f99f1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerTaskExecutorTask.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerTaskExecutorTask.java
@@ -6,6 +6,7 @@ import org.apache.solr.cloud.overseer.OverseerAction;
 import org.apache.solr.cloud.overseer.ReplicaMutator;
 import org.apache.solr.cloud.overseer.SliceMutator;
 import org.apache.solr.cloud.overseer.ZkStateWriter;
+import org.apache.solr.common.AlreadyClosedException;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkNodeProps;
@@ -52,7 +53,11 @@ public class OverseerTaskExecutorTask implements Runnable {
     if (log.isDebugEnabled()) log.debug("Process message {} {}", message, operation);
 
     if (log.isDebugEnabled()) log.debug("Enqueue message {}", operation);
-    zkStateWriter.enqueueUpdate(null, message, true);
+    try {
+      zkStateWriter.enqueueUpdate(null, message, true);
+    } catch (NullPointerException e) {
+      log.info("Overseer is stopped, won't process message");
+    }
 
 
     if (log.isDebugEnabled()) log.debug("State update consumed from queue {}", message);
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java b/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java
index ccef59e..9adc30d 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java
@@ -19,9 +19,9 @@ package org.apache.solr.cloud;
 import com.codahale.metrics.Timer;
 import com.google.common.collect.ImmutableSet;
 import org.apache.solr.cloud.OverseerTaskQueue.QueueEvent;
+import org.apache.solr.cloud.overseer.ZkStateWriter;
 import org.apache.solr.common.AlreadyClosedException;
 import org.apache.solr.common.ParWork;
-import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.util.IOUtils;
@@ -164,7 +164,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
             sessionExpired.set(true);
           } catch (InterruptedException e) {
             interrupted.set(true);
-          } catch (KeeperException e) {
+          } catch (Exception e) {
             log.error("Exception removing item from workQueue", e);
           }
           runningTasks.remove(entry.getKey());
@@ -173,14 +173,6 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
 
     }
 
-    if (interrupted.get()) {
-      Thread.currentThread().interrupt();
-      throw new InterruptedException();
-    }
-
-    if (sessionExpired.get()) {
-      throw new KeeperException.SessionExpiredException();
-    }
   }
 
   public void closing() {
@@ -201,22 +193,22 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
     IOUtils.closeQuietly(selector);
 
 
-    if (closeAndDone) {
-      // nocommit
-      //      for (Future future : taskFutures.values()) {
-      //        future.cancel(false);
-      //      }
-      for (Future future : taskFutures.values()) {
-        try {
-          future.get(1, TimeUnit.SECONDS);
-        } catch (InterruptedException e) {
-          ParWork.propagateInterrupt(e);
-          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
-        } catch (Exception e) {
-          log.info("Exception closing Overseer {} {}", e.getClass().getName(), e.getMessage());
-        }
-      }
-    }
+//    if (closeAndDone) {
+//      // nocommit
+//      //      for (Future future : taskFutures.values()) {
+//      //        future.cancel(false);
+//      //      }
+//      for (Future future : taskFutures.values()) {
+//        try {
+//          future.get(1, TimeUnit.SECONDS);
+//        } catch (InterruptedException e) {
+//          ParWork.propagateInterrupt(e);
+//          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+//        } catch (Exception e) {
+//          log.info("Exception closing Overseer {} {}", e.getClass().getName(), e.getMessage());
+//        }
+//      }
+//    }
 
   }
 
@@ -274,16 +266,18 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
     final ZkNodeProps message;
     final String operation;
     private final OverseerMessageHandler.Lock lock;
+    private final ZkStateWriter zkStateWriter;
     volatile OverseerSolrResponse response;
     final QueueEvent head;
     final OverseerMessageHandler messageHandler;
 
-    public Runner(OverseerMessageHandler messageHandler, ZkNodeProps message, String operation, QueueEvent head, OverseerMessageHandler.Lock lock) {
+    public Runner(OverseerMessageHandler messageHandler, ZkNodeProps message, String operation, QueueEvent head, OverseerMessageHandler.Lock lock, ZkStateWriter zkStateWriter) {
       this.message = message;
       this.operation = operation;
       this.head = head;
       this.messageHandler = messageHandler;
       this.lock = lock;
+      this.zkStateWriter = zkStateWriter;
     }
 
 
@@ -299,7 +293,8 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
           if (log.isDebugEnabled()) {
             log.debug("Runner processing {}", head.getId());
           }
-          response = messageHandler.processMessage(message, operation);
+
+          response = messageHandler.processMessage(message, operation, zkStateWriter);
         } finally {
           timerContext.stop();
           updateStats(statsName);
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerTaskQueue.java b/solr/core/src/java/org/apache/solr/cloud/OverseerTaskQueue.java
index a4d0b61..468b102 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerTaskQueue.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerTaskQueue.java
@@ -16,22 +16,13 @@
  */
 package org.apache.solr.cloud;
 
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.locks.Condition;
-import java.util.concurrent.locks.Lock;
-import java.util.concurrent.locks.ReentrantLock;
-import java.util.function.Predicate;
-
 import com.codahale.metrics.Timer;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkNodeProps;
-import org.apache.solr.common.util.Pair;
+import org.apache.solr.common.util.IOUtils;
+import org.apache.solr.common.util.TimeOut;
+import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.common.util.Utils;
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
@@ -41,6 +32,18 @@ import org.apache.zookeeper.data.Stat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.Closeable;
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
 /**
  * A {@link ZkDistributedQueue} augmented with helper methods specific to the overseer task queues.
  * Methods specific to this subclass ignore superclass internal state and hit ZK directly.
@@ -63,17 +66,6 @@ public class OverseerTaskQueue extends ZkDistributedQueue {
     super(zookeeper, dir, stats);
   }
 
-  public void allowOverseerPendingTasksToComplete() {
-    shuttingDown.set(true);
-    while (pendingResponses.get() > 0) {
-      try {
-        Thread.sleep(50);
-      } catch (InterruptedException e) {
-        log.error("Interrupted while waiting for overseer queue to drain before shutdown!");
-      }
-    }
-  }
-
   /**
    * Returns true if the queue contains a task with the specified async id.
    */
@@ -133,21 +125,22 @@ public class OverseerTaskQueue extends ZkDistributedQueue {
   /**
    * Watcher that blocks until a WatchedEvent occurs for a znode.
    */
-  static final class LatchWatcher implements Watcher {
+  static final class LatchWatcher implements Watcher, Closeable {
 
     private final Lock lock;
     private final Condition eventReceived;
-    private WatchedEvent event;
-    private Event.EventType latchEventType;
+    private final String path;
+    private final SolrZkClient zkClient;
+    private volatile WatchedEvent event;
+    private final Event.EventType latchEventType;
+    private volatile boolean closed;
 
-    LatchWatcher() {
-      this(null);
-    }
-
-    LatchWatcher(Event.EventType eventType) {
+    LatchWatcher(Event.EventType eventType, String path, SolrZkClient zkClient) {
       this.lock = new ReentrantLock();
       this.eventReceived = lock.newCondition();
       this.latchEventType = eventType;
+      this.path = path;
+      this.zkClient = zkClient;
     }
 
 
@@ -169,17 +162,38 @@ public class OverseerTaskQueue extends ZkDistributedQueue {
         } finally {
           lock.unlock();
         }
+      } else {
+        try {
+          Stat stat = zkClient.exists(path, this, true);
+          if (stat != null && stat.getDataLength() > 0) {
+            this.event = new WatchedEvent(Event.EventType.NodeDataChanged, Event.KeeperState.SyncConnected, path);
+            lock.lock();
+            try {
+              eventReceived.signalAll();
+            } finally {
+              lock.unlock();
+            }
+          }
+        } catch (Exception e) {
+          log.error("", e);
+        }
       }
     }
 
     public void await(long timeoutMs) throws InterruptedException {
-      assert timeoutMs > 0;
+      TimeOut timeout = new TimeOut(timeoutMs, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME);
       lock.lock();
       try {
         if (this.event != null) {
           return;
         }
-        eventReceived.await(timeoutMs, TimeUnit.MILLISECONDS);
+        while (!timeout.hasTimedOut() && event == null && !closed) {
+          eventReceived.await(500, TimeUnit.MILLISECONDS);
+        }
+
+        if (timeout.hasTimedOut()) {
+          log.warn("Timeout waiting for response after {}ms", timeout.timeElapsed(TimeUnit.MILLISECONDS));
+        }
       } finally {
         lock.unlock();
       }
@@ -188,6 +202,22 @@ public class OverseerTaskQueue extends ZkDistributedQueue {
     public WatchedEvent getWatchedEvent() {
       return event;
     }
+
+    @Override
+    public void close() throws IOException {
+      this.closed = true;
+      try {
+        zkClient.getSolrZooKeeper().removeWatches(path, this, Watcher.WatcherType.Any, true);
+      } catch (Exception e) {
+        log.info("could not remove watch {} {}", e.getClass().getSimpleName(), e.getMessage());
+      }
+      try {
+        lock.lock();
+        eventReceived.signalAll();
+      } finally {
+        lock.unlock();
+      }
+    }
   }
 
   /**
@@ -203,7 +233,7 @@ public class OverseerTaskQueue extends ZkDistributedQueue {
       } catch (KeeperException.NodeExistsException e) {
         log.warn("Found request node already, waiting to see if it frees up ...");
         // TODO: use a watch?
-        Thread.sleep(250);
+        Thread.sleep(50);
         try {
           return zookeeper.create(path, data, mode, true);
         } catch (KeeperException.NodeExistsException ne) {
@@ -220,18 +250,21 @@ public class OverseerTaskQueue extends ZkDistributedQueue {
   public QueueEvent offer(byte[] data, long timeout) throws KeeperException,
       InterruptedException {
     if (log.isDebugEnabled()) log.debug("offer operation to the Overseeer queue {}", Utils.fromJSON(data));
+    log.info("offer operation to the Overseeer queue {}", Utils.fromJSON(data));
     if (shuttingDown.get()) {
       throw new SolrException(SolrException.ErrorCode.CONFLICT,"Solr is shutting down, no more overseer tasks may be offered");
     }
    // Timer.Context time = stats.time(dir + "_offer");
+    LatchWatcher watcher = null;
     try {
       // Create and watch the response node before creating the request node;
       // otherwise we may miss the response.
       String watchID = createResponseNode();
 
       if (log.isDebugEnabled()) log.debug("watchId for response node {}, setting a watch ... ", watchID);
+      log.info("watchId for response node {}, setting a watch ... ", watchID);
 
-      LatchWatcher watcher = new LatchWatcher(Watcher.Event.EventType.NodeDataChanged);
+      watcher = new LatchWatcher(Watcher.Event.EventType.NodeDataChanged, watchID, zookeeper);
       Stat stat = zookeeper.exists(watchID, watcher, true);
 
       // create the request node
@@ -244,15 +277,18 @@ public class OverseerTaskQueue extends ZkDistributedQueue {
       }
       byte[] bytes = zookeeper.getData(watchID, null, null, true);
       if (log.isDebugEnabled()) log.debug("get data from response node {} {} {}", watchID, bytes == null ? null : bytes.length, watcher.getWatchedEvent());
+
+      if (bytes == null) {
+        log.error("Found no data at response node {}", watchID);
+      }
       // create the event before deleting the node, otherwise we can get the deleted
       // event from the watcher.
       QueueEvent event =  new QueueEvent(watchID, bytes, watcher.getWatchedEvent());
-      if (log.isDebugEnabled()) log.debug("delete response node... {}", watchID);
-    //  zookeeper.delete(watchID, -1, true);
       return event;
     } finally {
      // time.stop();
       pendingResponses.decrementAndGet();
+      IOUtils.closeQuietly(watcher);
     }
   }
 
@@ -265,29 +301,7 @@ public class OverseerTaskQueue extends ZkDistributedQueue {
   String createResponseNode() throws KeeperException, InterruptedException {
     return createData(
         Overseer.OVERSEER_COLLECTION_MAP_COMPLETED + "/" + RESPONSE_PREFIX,
-        null, CreateMode.EPHEMERAL_SEQUENTIAL);
-  }
-
-
-  public List<QueueEvent> peekTopN(int n, Predicate<String> excludeSet, long waitMillis)
-      throws KeeperException, InterruptedException {
-    ArrayList<QueueEvent> topN = new ArrayList<>();
-
-    log.debug("Peeking for top {} elements. ExcludeSet: {}", n, excludeSet);
-    Timer.Context time;
-    if (waitMillis == Long.MAX_VALUE) time = stats.time(dir + "_peekTopN_wait_forever");
-    else time = stats.time(dir + "_peekTopN_wait" + waitMillis);
-
-    try {
-      for (Pair<String, byte[]> element : peekElements(n, waitMillis, child -> !excludeSet.test(dir + "/" + child))) {
-        topN.add(new QueueEvent(dir + "/" + element.first(),
-            element.second(), null));
-      }
-      printQueueEventsListElementIds(topN);
-      return topN;
-    } finally {
-      time.stop();
-    }
+        null, CreateMode.PERSISTENT_SEQUENTIAL);
   }
 
   private static void printQueueEventsListElementIds(ArrayList<QueueEvent> topN) {
diff --git a/solr/core/src/java/org/apache/solr/cloud/RecoveringCoreTermWatcher.java b/solr/core/src/java/org/apache/solr/cloud/RecoveringCoreTermWatcher.java
index 2ce6251..7545282 100644
--- a/solr/core/src/java/org/apache/solr/cloud/RecoveringCoreTermWatcher.java
+++ b/solr/core/src/java/org/apache/solr/cloud/RecoveringCoreTermWatcher.java
@@ -17,9 +17,6 @@
 
 package org.apache.solr.cloud;
 
-import java.lang.invoke.MethodHandles;
-import java.util.concurrent.atomic.AtomicLong;
-
 import org.apache.solr.client.solrj.cloud.ShardTerms;
 import org.apache.solr.common.ParWork;
 import org.apache.solr.core.CoreContainer;
@@ -28,16 +25,21 @@ import org.apache.solr.core.SolrCore;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.Closeable;
+import java.lang.invoke.MethodHandles;
+import java.util.concurrent.atomic.AtomicLong;
+
 /**
  * Start recovery of a core if its term is less than leader's term
  */
-public class RecoveringCoreTermWatcher implements ZkShardTerms.CoreTermWatcher {
+public class RecoveringCoreTermWatcher implements ZkShardTerms.CoreTermWatcher, Closeable {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
   private final CoreDescriptor coreDescriptor;
   private final CoreContainer coreContainer;
   // used to prevent the case when term of other replicas get changed, we redo recovery
   // the idea here is with a specific term of a replica, we only do recovery one
   private final AtomicLong lastTermDoRecovery;
+  private volatile boolean closed;
 
   RecoveringCoreTermWatcher(CoreDescriptor coreDescriptor, CoreContainer coreContainer) {
     this.coreDescriptor = coreDescriptor;
@@ -49,18 +51,18 @@ public class RecoveringCoreTermWatcher implements ZkShardTerms.CoreTermWatcher {
   public boolean onTermChanged(ShardTerms terms) {
     if (coreContainer.isShutDown()) return false;
 
-    try (SolrCore solrCore = coreContainer.getCore(coreDescriptor.getName())) {
-      if (solrCore == null || solrCore.isClosed()) {
+     try {
+      if (closed) {
         return false;
       }
-
-      if (solrCore.getCoreDescriptor() == null || solrCore.getCoreDescriptor().getCloudDescriptor() == null) return true;
-      String coreName = solrCore.getCoreDescriptor().getName();
+      String coreName = coreDescriptor.getName();
       if (terms.haveHighestTermValue(coreName)) return true;
       if (terms.getTerm(coreName) != null && lastTermDoRecovery.get() < terms.getTerm(coreName)) {
         log.info("Start recovery on {} because core's term is less than leader's term", coreName);
         lastTermDoRecovery.set(terms.getTerm(coreName));
-        solrCore.getUpdateHandler().getSolrCoreState().doRecovery(solrCore.getCoreContainer(), solrCore.getCoreDescriptor());
+        try (SolrCore solrCore = coreContainer.getCore(coreDescriptor.getName())) {
+          solrCore.getUpdateHandler().getSolrCoreState().doRecovery(solrCore.getCoreContainer(), solrCore.getCoreDescriptor());
+        }
       }
     } catch (Exception e) {
       ParWork.propagateInterrupt(e);
@@ -87,4 +89,9 @@ public class RecoveringCoreTermWatcher implements ZkShardTerms.CoreTermWatcher {
   public int hashCode() {
     return coreDescriptor.getName().hashCode();
   }
+
+  @Override
+  public void close() {
+   this.closed = true;
+  }
 }
diff --git a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
index ae4ac42..9337712 100644
--- a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
+++ b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
@@ -16,9 +16,7 @@
  */
 package org.apache.solr.cloud;
 
-import org.apache.lucene.index.IndexCommit;
 import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.store.Directory;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.Http2SolrClient;
 import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
@@ -39,7 +37,6 @@ import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.core.CoreContainer;
 import org.apache.solr.core.CoreDescriptor;
-import org.apache.solr.core.DirectoryFactory.DirContext;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.handler.IndexFetcher;
 import org.apache.solr.handler.ReplicationHandler;
@@ -63,7 +60,6 @@ import java.io.Closeable;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.concurrent.CountDownLatch;
@@ -72,23 +68,18 @@ import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.locks.ReentrantLock;
 
 /**
  * This class may change in future and customisations are not supported between versions in terms of API or back compat
  * behaviour.
- * 
+ *
  * @lucene.experimental
  */
 public class RecoveryStrategy implements Runnable, Closeable {
 
   private volatile CountDownLatch latch;
   private volatile ReplicationHandler replicationHandler;
-  private volatile ReentrantLock recoveryLock;
-
-  public final void setRecoveryLock(ReentrantLock recoveryLock) {
-    this.recoveryLock = recoveryLock;
-  }
+  private volatile Http2SolrClient recoveryOnlyClient;
 
   public static class Builder implements NamedListInitializedPlugin {
     private NamedList args;
@@ -118,7 +109,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
       .getInteger("solr.cloud.wait-for-updates-with-stale-state-pause", 0);
   private volatile int maxRetries = 500;
   private volatile int startingRecoveryDelayMilliSeconds = Integer
-          .getInteger("solr.cloud.starting-recovery-delay-milli-seconds", 0);
+      .getInteger("solr.cloud.starting-recovery-delay-milli-seconds", 0);
 
   public static interface RecoveryListener {
     public void recovered();
@@ -127,16 +118,16 @@ public class RecoveryStrategy implements Runnable, Closeable {
   }
 
   private volatile boolean close = false;
-  private volatile RecoveryListener recoveryListener;
+  private final RecoveryListener recoveryListener;
   private final ZkController zkController;
   private final String baseUrl;
   private final ZkStateReader zkStateReader;
-  private volatile String coreName;
+  private final String coreName;
   private final AtomicInteger retries = new AtomicInteger(0);
   private boolean recoveringAfterStartup;
   private volatile Cancellable prevSendPreRecoveryHttpUriRequest;
-  private volatile Replica.Type replicaType;
-  private volatile CoreDescriptor coreDescriptor;
+  private final Replica.Type replicaType;
+  private final CoreDescriptor coreDescriptor;
 
   private final CoreContainer cc;
 
@@ -150,6 +141,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
     zkStateReader = zkController.getZkStateReader();
     baseUrl = zkController.getBaseUrl();
     replicaType = cd.getCloudDescriptor().getReplicaType();
+    this.coreDescriptor = cd;
   }
 
   final public int getWaitForUpdatesWithStaleStatePauseMilliSeconds() {
@@ -184,7 +176,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
   final public void setRecoveringAfterStartup(boolean recoveringAfterStartup) {
     this.recoveringAfterStartup = recoveringAfterStartup;
   }
-  
+
   // make sure any threads stop retrying
   @Override
   final public void close() {
@@ -193,7 +185,9 @@ public class RecoveryStrategy implements Runnable, Closeable {
     try (ParWork closer = new ParWork(this, true, true)) {
       closer.collect("prevSendPreRecoveryHttpUriRequestAbort", () -> {
         try {
-          prevSendPreRecoveryHttpUriRequest.cancel();
+          if (prevSendPreRecoveryHttpUriRequest != null) {
+            prevSendPreRecoveryHttpUriRequest.cancel();
+          }
           prevSendPreRecoveryHttpUriRequest = null;
         } catch (NullPointerException e) {
           // expected
@@ -204,15 +198,18 @@ public class RecoveryStrategy implements Runnable, Closeable {
         ReplicationHandler finalReplicationHandler = replicationHandler;
         closer.collect("abortFetch", () -> {
           if (finalReplicationHandler != null) finalReplicationHandler.abortFetch();
+          replicationHandler = null;
+        });
+      }
+      if (latch != null) {
+        closer.collect("latch", () -> {
+          try {
+            latch.countDown();
+          } catch (NullPointerException e) {
+            // expected
+          }
         });
       }
-      closer.collect("latch", () -> {
-        try {
-          latch.countDown();
-        } catch (NullPointerException e) {
-          // expected
-        }
-      });
 
     }
 
@@ -220,11 +217,10 @@ public class RecoveryStrategy implements Runnable, Closeable {
     //ObjectReleaseTracker.release(this);
   }
 
-  final private void recoveryFailed(final SolrCore core,
-      final ZkController zkController, final String baseUrl, final CoreDescriptor cd) throws Exception {
+  final private void recoveryFailed(final ZkController zkController, final String baseUrl, final CoreDescriptor cd) throws Exception {
     SolrException.log(log, "Recovery failed - I give up.");
     try {
-      if (zkController.getZkClient().isConnected()) {
+      if (zkController.getZkClient().isAlive()) {
         zkController.publish(cd, Replica.State.RECOVERY_FAILED);
       }
     } finally {
@@ -243,7 +239,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
     return leaderprops.getCoreUrl();
   }
 
-  final private void replicate(String nodeName, SolrCore core, Replica leaderprops)
+  final private void replicate(Replica leaderprops)
       throws SolrServerException, IOException {
 
     final String leaderUrl = getReplicateLeaderUrl(leaderprops, zkStateReader);
@@ -251,13 +247,10 @@ public class RecoveryStrategy implements Runnable, Closeable {
     log.info("Attempting to replicate from [{}].", leaderprops);
 
     // send commit
-    commitOnLeader(core, leaderUrl);
-
-    // use rep handler directly, so we can do this sync rather than async
-    SolrRequestHandler handler = core.getRequestHandler(ReplicationHandler.PATH);
-    ReplicationHandler replicationHandler = (ReplicationHandler) handler;
+    commitOnLeader(leaderUrl);
 
     if (replicationHandler == null) {
+      log.error("Could not find replication handler for recovery");
       throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE,
           "Skipping recovery, no " + ReplicationHandler.PATH + " handler found");
     }
@@ -276,7 +269,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
 
     if (result.getMessage().equals(IndexFetcher.IndexFetchResult.FAILED_BY_INTERRUPT_MESSAGE)) {
       log.info("Interrupted, stopping recovery");
-      return;
+
     }
 
     if (result.getSuccessful()) {
@@ -284,103 +277,102 @@ public class RecoveryStrategy implements Runnable, Closeable {
       success= true;
     } else {
       log.error("replication fetch reported as failed: {} {} {}", result.getMessage(), result, result.getException());
-    }
-
-    if (!success) {
-      throw new SolrException(ErrorCode.SERVER_ERROR, "Replication for recovery failed.");
+      throw new SolrException(ErrorCode.SERVER_ERROR, "Replication fetch reported as failed");
     }
 
     // solrcloud_debug
-    if (log.isDebugEnabled()) {
-      try {
-        RefCounted<SolrIndexSearcher> searchHolder = core
-            .getNewestSearcher(false);
-        SolrIndexSearcher searcher = searchHolder.get();
-        Directory dir = core.getDirectoryFactory().get(core.getIndexDir(), DirContext.META_DATA, null);
-        try {
-          final IndexCommit commit = core.getDeletionPolicy().getLatestCommit();
-          if (log.isDebugEnabled()) {
-            log.debug("{} replicated {} from {} gen: {} data: {} index: {} newIndex: {} files: {}"
-                , core.getCoreContainer().getZkController().getNodeName()
-                , searcher.count(new MatchAllDocsQuery())
-                , leaderUrl
-                , (null == commit ? "null" : commit.getGeneration())
-                , core.getDataDir()
-                , core.getIndexDir()
-                , core.getNewIndexDir()
-                , Arrays.asList(dir.listAll()));
-          }
-        } finally {
-          core.getDirectoryFactory().release(dir);
-          searchHolder.decref();
-        }
-      } catch (Exception e) {
-        ParWork.propagateInterrupt(e);
-        log.debug("Error in solrcloud_debug block", e);
-      }
-    }
+//    if (log.isDebugEnabled()) {
+//      try {
+//        RefCounted<SolrIndexSearcher> searchHolder = core
+//            .getNewestSearcher(false);
+//        SolrIndexSearcher searcher = searchHolder.get();
+//        Directory dir = core.getDirectoryFactory().get(core.getIndexDir(), DirContext.META_DATA, null);
+//        try {
+//          final IndexCommit commit = core.getDeletionPolicy().getLatestCommit();
+//          if (log.isDebugEnabled()) {
+//            log.debug("{} replicated {} from {} gen: {} data: {} index: {} newIndex: {} files: {}"
+//                , core.getCoreContainer().getZkController().getNodeName()
+//                , searcher.count(new MatchAllDocsQuery())
+//                , leaderUrl
+//                , (null == commit ? "null" : commit.getGeneration())
+//                , core.getDataDir()
+//                , core.getIndexDir()
+//                , core.getNewIndexDir()
+//                , Arrays.asList(dir.listAll()));
+//          }
+//        } finally {
+//          core.getDirectoryFactory().release(dir);
+//          searchHolder.decref();
+//        }
+//      } catch (Exception e) {
+//        ParWork.propagateInterrupt(e);
+//        log.debug("Error in solrcloud_debug block", e);
+//      }
+//    }
 
   }
 
-  final private void commitOnLeader(SolrCore core, String leaderUrl) throws SolrServerException,
+  final private void commitOnLeader(String leaderUrl) throws SolrServerException,
       IOException {
-    log.info("send commit to leader {}", leaderUrl);
-    Http2SolrClient client = core.getCoreContainer().getUpdateShardHandler().getRecoveryOnlyClient();
+
     UpdateRequest ureq = new UpdateRequest();
     ureq.setBasePath(leaderUrl);
     ureq.setParams(new ModifiableSolrParams());
     ureq.getParams().set(DistributedUpdateProcessor.COMMIT_END_POINT, "terminal");
+    //ureq.getParams().set("dist", false);
     // ureq.getParams().set(UpdateParams.OPEN_SEARCHER, onlyLeaderIndexes);// Why do we need to open searcher if
     // "onlyLeaderIndexes"?
     ureq.getParams().set(UpdateParams.OPEN_SEARCHER, false);
-    ureq.setAction(AbstractUpdateRequest.ACTION.COMMIT, false, true).process(client);
+
+    log.info("send commit to leader {} {}", leaderUrl, ureq.getParams());
+    ureq.setAction(AbstractUpdateRequest.ACTION.COMMIT, false, false).process(recoveryOnlyClient);
   }
 
   @Override
   final public void run() {
-
     // set request info for logging
-
     log.info("Starting recovery process. recoveringAfterStartup={}", recoveringAfterStartup);
-
-    try (SolrCore core = cc.getCore(coreName)) {
-      if (core == null) {
-        close = true;
-        return;
-      }
-
-      SolrRequestHandler handler = core.getRequestHandler(ReplicationHandler.PATH);
-      replicationHandler = (ReplicationHandler) handler;
-
-      doRecovery(core);
+    try {
+      doRecovery();
     } catch (InterruptedException e) {
-      ParWork.propagateInterrupt(e, true);
+      log.info("InterruptedException, won't do recovery", e);
       return;
     } catch (AlreadyClosedException e) {
+      log.info("AlreadyClosedException, won't do recovery");
       return;
     } catch (Exception e) {
       ParWork.propagateInterrupt(e);
-      log.error("", e);
+      log.error("Exception during recovery", e);
       return;
-    } finally {
-      try {
-        recoveryLock.unlock();
-      } catch (NullPointerException e) {}
     }
-
   }
 
-  final public void doRecovery(SolrCore core) throws Exception {
+  final public void doRecovery() throws Exception {
     // we can lose our core descriptor, so store it now
-    this.coreDescriptor = core.getCoreDescriptor();
+//    try {
+//      Replica leader = zkController.getZkStateReader().getLeaderRetry(coreDescriptor.getCollectionName(), coreDescriptor.getCloudDescriptor().getShardId(), 15000);
+//      if (leader != null && leader.getName().equals(coreName)) {
+//        log.info("We are the leader, STOP recovery");
+//        return;
+//      }
+//    } catch (InterruptedException e) {
+//      log.info("InterruptedException, won't do recovery", e);
+//      throw new SolrException(ErrorCode.BAD_REQUEST, e);
+//    } catch (TimeoutException e) {
+//      log.info("Timeout waiting for leader, won't do recovery", e);
+//   //   throw new SolrException(ErrorCode.SERVER_ERROR, e);
+//    }
+
     if (this.coreDescriptor.getCloudDescriptor().requiresTransactionLog()) {
-      doSyncOrReplicateRecovery(core);
+      log.info("Sync or replica recovery");
+      doSyncOrReplicateRecovery();
     } else {
-      doReplicateOnlyRecovery(core);
+      log.info("Replicate only recovery");
+      doReplicateOnlyRecovery();
     }
   }
 
-  final private void doReplicateOnlyRecovery(SolrCore core) throws InterruptedException {
+  final private void doReplicateOnlyRecovery() throws Exception {
     boolean successfulRecovery = false;
 
     // if (core.getUpdateHandler().getUpdateLog() != null) {
@@ -389,57 +381,36 @@ public class RecoveryStrategy implements Runnable, Closeable {
     // + core.getUpdateHandler().getUpdateLog());
     // return;
     // }
-    while (!successfulRecovery && !isClosed()) { // don't use interruption or
-                                                                                            // it will close channels
-                                                                                            // though
-      try {
-        CloudDescriptor cloudDesc = this.coreDescriptor.getCloudDescriptor();
-        Replica leaderprops = zkStateReader.getLeaderRetry(
-            cloudDesc.getCollectionName(), cloudDesc.getShardId());
 
-        String leaderUrl = leaderprops.getCoreUrl();
+    log.info("Publishing state of core [{}] as recovering", coreName);
 
-        String ourUrl = Replica.getCoreUrl(baseUrl, coreName);
+    zkController.publish(this.coreDescriptor, Replica.State.RECOVERING);
 
-        boolean isLeader = leaderUrl.equals(ourUrl); // TODO: We can probably delete most of this code if we say this
-                                                     // strategy can only be used for pull replicas
-        if (isLeader && !cloudDesc.isLeader()) {
-          throw new SolrException(ErrorCode.SERVER_ERROR, "Cloud state still says we are leader.");
-        }
-        if (cloudDesc.isLeader()) {
-          assert cloudDesc.getReplicaType() != Replica.Type.PULL;
-          // we are now the leader - no one else must have been suitable
-          log.warn("We have not yet recovered - but we are now the leader!");
-          log.info("Finished recovery process.");
-          zkController.publish(this.coreDescriptor, Replica.State.ACTIVE);
-          return;
+    while (!successfulRecovery && !isClosed()) { // don't use interruption or
+      // it will close channels
+      // though
+      try {
+        try (SolrCore core = cc.getCore(coreName)) {
+          if (core == null) {
+            log.warn("SolrCore is null, won't do recovery");
+            throw new AlreadyClosedException();
+          }
+          recoveryOnlyClient = core.getCoreContainer().getUpdateShardHandler().getRecoveryOnlyClient();
+          SolrRequestHandler handler = core.getRequestHandler(ReplicationHandler.PATH);
+          replicationHandler = (ReplicationHandler) handler;
         }
 
-        if (log.isInfoEnabled()) {
-          log.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(), leaderUrl,
-              ourUrl);
-        }
-        zkController.publish(this.coreDescriptor, Replica.State.RECOVERING);
+        CloudDescriptor cloudDesc = this.coreDescriptor.getCloudDescriptor();
+        Replica leaderprops = zkStateReader.getLeaderRetry(
+            cloudDesc.getCollectionName(), cloudDesc.getShardId(), 15000);
 
-        if (isClosed()) {
-          if (log.isInfoEnabled()) {
-            log.info("Recovery for core {} has been closed", core.getName());
-          }
-          break;
-        }
-        log.info("Starting Replication Recovery.");
+        log.info("Starting Replication Recovery. [{}] leader is [{}] and I am [{}]", coreName, leaderprops.getName(), Replica.getCoreUrl(baseUrl, coreName));
+        log.info("");
 
         try {
           log.info("Stopping background replicate from leader process");
           zkController.stopReplicationFromLeader(coreName);
-          replicate(zkController.getNodeName(), core, leaderprops);
-
-          if (isClosed()) {
-            if (log.isInfoEnabled()) {
-              log.info("Recovery for core {} has been closed", core.getName());
-            }
-            break;
-          }
+          replicate(leaderprops);
 
           log.info("Replication Recovery was successful.");
           successfulRecovery = true;
@@ -466,7 +437,6 @@ public class RecoveryStrategy implements Runnable, Closeable {
           }
 
           if (successfulRecovery) {
-            close = true;
             recoveryListener.recovered();
           }
         }
@@ -478,20 +448,13 @@ public class RecoveryStrategy implements Runnable, Closeable {
         // Or do a fall off retry...
         try {
 
-          if (isClosed()) {
-            if (log.isInfoEnabled()) {
-              log.info("Recovery for core {} has been closed", core.getName());
-            }
-            break;
-          }
-
           log.error("Recovery failed - trying again... ({})", retries);
 
 
           if (retries.incrementAndGet() >= maxRetries) {
             SolrException.log(log, "Recovery failed - max retries exceeded (" + retries + ").");
             try {
-              recoveryFailed(core, zkController, baseUrl, this.coreDescriptor);
+              recoveryFailed(zkController, baseUrl, this.coreDescriptor);
             } catch (InterruptedException e) {
               ParWork.propagateInterrupt(e);
               return;
@@ -505,27 +468,8 @@ public class RecoveryStrategy implements Runnable, Closeable {
           ParWork.propagateInterrupt(e);
           SolrException.log(log, "An error has occurred during recovery", e);
         }
-
-        try {
-          // Wait an exponential interval between retries, start at 5 seconds and work up to a minute.
-          // If we're at attempt >= 4, there's no point computing pow(2, retries) because the result
-          // will always be the minimum of the two (12). Since we sleep at 5 seconds sub-intervals in
-          // order to check if we were closed, 12 is chosen as the maximum loopCount (5s * 12 = 1m).
-          int loopCount =  retries.get() < 4 ? (int) Math.min(Math.pow(2, retries.get()), 12) : 12;
-          log.info("Wait [{}] seconds before trying to recover again (attempt={})",
-              TimeUnit.MILLISECONDS.toSeconds(loopCount * startingRecoveryDelayMilliSeconds), retries);
-          for (int i = 0; i < loopCount; i++) {
-            if (isClosed()) {
-              if (log.isInfoEnabled()) {
-                log.info("Recovery for core {} has been closed", core.getName());
-              }
-              break; // check if someone closed us
-            }
-            Thread.sleep(startingRecoveryDelayMilliSeconds);
-          }
-        } catch (InterruptedException e) {
-          log.warn("Recovery was interrupted.", e);
-          close = true;
+        if (!successfulRecovery) {
+          waitForRetry();
         }
       }
 
@@ -535,17 +479,30 @@ public class RecoveryStrategy implements Runnable, Closeable {
   }
 
   // TODO: perhaps make this grab a new core each time through the loop to handle core reloads?
-  public final void doSyncOrReplicateRecovery(SolrCore core) throws Exception {
-    log.info("Do peersync or replication recovery core={} collection={}", core.getName(), core.getCoreDescriptor().getCollectionName());
-    boolean successfulRecovery = false;
+  public final void doSyncOrReplicateRecovery() throws Exception {
+    log.info("Do peersync or replication recovery core={} collection={}", coreName, coreDescriptor.getCollectionName());
 
+    log.info("Publishing state of core [{}] as recovering", coreName);
+
+    zkController.publish(this.coreDescriptor, Replica.State.RECOVERING);
+
+    boolean successfulRecovery = false;
+    boolean publishedActive = false;
     UpdateLog ulog;
-    ulog = core.getUpdateHandler().getUpdateLog();
-    if (ulog == null) {
-      SolrException.log(log, "No UpdateLog found - cannot recover.");
-      recoveryFailed(core, zkController, baseUrl,
-          this.coreDescriptor);
-      return;
+    try (SolrCore core = cc.getCore(coreName)) {
+      if (core == null) {
+        log.warn("SolrCore is null, won't do recovery");
+        throw new AlreadyClosedException();
+      }
+      recoveryOnlyClient = core.getCoreContainer().getUpdateShardHandler().getRecoveryOnlyClient();
+      SolrRequestHandler handler = core.getRequestHandler(ReplicationHandler.PATH);
+      replicationHandler = (ReplicationHandler) handler;
+      ulog = core.getUpdateHandler().getUpdateLog();
+      if (ulog == null) {
+        SolrException.log(log, "No UpdateLog found - cannot recover.");
+        recoveryFailed(zkController, baseUrl, this.coreDescriptor);
+        return;
+      }
     }
 
     // we temporary ignore peersync for tlog replicas
@@ -588,9 +545,6 @@ public class RecoveryStrategy implements Runnable, Closeable {
           }
         }
       } catch (Exception e) {
-        if (e instanceof  InterruptedException) {
-          return;
-        }
         ParWork.propagateInterrupt(e);
         SolrException.log(log, "Error getting recent versions.", e);
         recentVersions = Collections.emptyList();
@@ -621,42 +575,26 @@ public class RecoveryStrategy implements Runnable, Closeable {
       zkController.stopReplicationFromLeader(coreName);
     }
 
-    final String ourUrl = Replica.getCoreUrl(baseUrl, coreName);
     Future<RecoveryInfo> replayFuture = null;
-    while (!successfulRecovery && !isClosed()) { // don't use interruption or
-                                                                                            // it will close channels
-                                                                                            // though
+
+    while (!successfulRecovery && !isClosed()) {
       try {
         CloudDescriptor cloudDesc = this.coreDescriptor.getCloudDescriptor();
-        final Replica leader = zkStateReader.getLeaderRetry(cloudDesc.getCollectionName(), cloudDesc.getShardId(), 3000);
+        final Replica leader = zkStateReader.getLeaderRetry(cloudDesc.getCollectionName(), cloudDesc.getShardId(), 15000);
 
         log.info("Begin buffering updates. core=[{}]", coreName);
         // recalling buffer updates will drop the old buffer tlog
         ulog.bufferUpdates();
 
-        if (log.isInfoEnabled()) {
-          log.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(),
-              leader.getCoreUrl(),
-              ourUrl);
-        }
-        zkController.publish(this.coreDescriptor, Replica.State.RECOVERING);
-
-        final Slice slice = zkStateReader.getClusterState().getCollection(cloudDesc.getCollectionName())
-            .getSlice(cloudDesc.getShardId());
-
         try {
-          prevSendPreRecoveryHttpUriRequest.cancel();
+          if (prevSendPreRecoveryHttpUriRequest != null) {
+            prevSendPreRecoveryHttpUriRequest.cancel();
+          }
         } catch (NullPointerException e) {
           // okay
         }
 
-        if (isClosed()) {
-          log.info("RecoveryStrategy has been closed");
-          break;
-        }
-
-        sendPrepRecoveryCmd(core, leader.getCoreUrl(), leader.getName(), slice);
-
+       // sendPrepRecoveryCmd(leader.getBaseUrl(), leader.getName(), slice);
 
         // we wait a bit so that any updates on the leader
         // that started before they saw recovering state
@@ -674,59 +612,63 @@ public class RecoveryStrategy implements Runnable, Closeable {
         if (firstTime) {
           firstTime = false; // only try sync the first time through the loop
           if (log.isInfoEnabled()) {
-            log.info("Attempting to PeerSync from [{}] - recoveringAfterStartup=[{}]", leader.getCoreUrl(),
-                recoveringAfterStartup);
+            log.info("Attempting to PeerSync from [{}] - recoveringAfterStartup=[{}]", leader.getCoreUrl(), recoveringAfterStartup);
           }
-          // System.out.println("Attempting to PeerSync from " + leaderUrl
-          // + " i am:" + zkController.getNodeName());
-          boolean syncSuccess;
-          try (PeerSyncWithLeader peerSyncWithLeader = new PeerSyncWithLeader(core,
-              leader.getCoreUrl(), ulog.getNumRecordsToKeep())) {
-            syncSuccess = peerSyncWithLeader.sync(recentVersions).isSuccess();
+          try (SolrCore core = cc.getCore(coreName)) {
+            if (core == null) {
+              log.warn("SolrCore is null, won't do recovery");
+              throw new AlreadyClosedException();
+            }
+
+            // System.out.println("Attempting to PeerSync from " + leaderUrl
+            // + " i am:" + zkController.getNodeName());
+            boolean syncSuccess;
+            try (PeerSyncWithLeader peerSyncWithLeader = new PeerSyncWithLeader(core, leader.getCoreUrl(), ulog.getNumRecordsToKeep())) {
+              syncSuccess = peerSyncWithLeader.sync(recentVersions).isSuccess();
+            }
+            if (syncSuccess) {
+              SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
+              log.info("PeerSync was successful, commit to force open a new searcher");
+              // force open a new searcher
+              core.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
+              req.close();
+              log.info("PeerSync stage of recovery was successful.");
+
+              // solrcloud_debug
+              // cloudDebugLog(core, "synced");
+
+              log.info("Replaying updates buffered during PeerSync.");
+              replay();
+
+              // sync success
+              successfulRecovery = true;
+            }
           }
-          if (syncSuccess) {
-            SolrQueryRequest req = new LocalSolrQueryRequest(core,
-                new ModifiableSolrParams());
-            log.info("PeerSync was successful, commit to force open a new searcher");
-            // force open a new searcher
-            core.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
-            req.close();
-            log.info("PeerSync stage of recovery was successful.");
-
-            // solrcloud_debug
-            cloudDebugLog(core, "synced");
-
-            log.info("Replaying updates buffered during PeerSync.");
-            replay(core);
-
-            // sync success
-            successfulRecovery = true;
-            break;
+          if (!successfulRecovery) {
+            log.info("PeerSync Recovery was not successful - trying replication.");
           }
-
-          log.info("PeerSync Recovery was not successful - trying replication.");
         }
+        if (!successfulRecovery) {
+          log.info("Starting Replication Recovery.");
 
+          try {
 
-        log.info("Starting Replication Recovery.");
-
-        try {
-
-          replicate(zkController.getNodeName(), core, leader);
+            replicate(leader);
 
-          replay(core);
+            replay();
 
-          log.info("Replication Recovery was successful.");
-          successfulRecovery = true;
-        } catch (InterruptedException | AlreadyClosedException e) {
-          log.info("Interrupted or already closed, bailing on recovery");
-          return;
-        } catch (Exception e) {
-          SolrException.log(log, "Error while trying to recover", e);
+            log.info("Replication Recovery was successful.");
+            successfulRecovery = true;
+          } catch (InterruptedException | AlreadyClosedException e) {
+            log.info("Interrupted or already closed, bailing on recovery");
+            throw new AlreadyClosedException();
+          } catch (Exception e) {
+            log.error("Error while trying to recover", e);
+          }
         }
-
       } catch (Exception e) {
-        SolrException.log(log, "Error while trying to recover. core=" + coreName, e);
+        log.error("Error while trying to recover. core=" + coreName, e);
+        successfulRecovery = false;
       } finally {
         if (successfulRecovery) {
           log.info("Registering as Active after recovery {}", coreName);
@@ -734,42 +676,53 @@ public class RecoveryStrategy implements Runnable, Closeable {
             if (replicaType == Replica.Type.TLOG) {
               zkController.startReplicationFromLeader(coreName, true);
             }
+            publishedActive = true;
             zkController.publish(this.coreDescriptor, Replica.State.ACTIVE);
-          } catch (InterruptedException e) {
-            ParWork.propagateInterrupt(e);
-            close = true;
+          } catch (AlreadyClosedException e) {
+
           } catch (Exception e) {
             log.error("Could not publish as ACTIVE after succesful recovery", e);
-            successfulRecovery = false;
+           // core.getSolrCoreState().doRecovery(core);
           }
 
-          if (successfulRecovery) {
-            close = true;
-            recoveryListener.recovered();
-          }
+
         } else {
           log.info("Recovery was not sucessful, will not register as ACTIVE {}", coreName);
         }
+
+        if (successfulRecovery) {
+          recoveryListener.recovered();
+        }
+
+        // if replay was skipped (possibly to due pulling a full index from the leader),
+        // then we still need to update version bucket seeds after recovery
+        if (successfulRecovery && replayFuture == null) {
+          log.info("Updating version bucket highest from index after successful recovery.");
+          try (SolrCore core = cc.getCore(coreName)) {
+            if (core == null) {
+              log.warn("SolrCore is null, won't do recovery");
+              throw new AlreadyClosedException();
+            }
+            core.seedVersionBuckets();
+          }
+        }
       }
 
-      if (!successfulRecovery) {
+      if (!successfulRecovery && !isClosed()) {
         // lets pause for a moment and we need to try again...
         // TODO: we don't want to retry for some problems?
         // Or do a fall off retry...
         try {
-
-
           log.error("Recovery failed - trying again... ({})", retries);
 
           if (retries.incrementAndGet() >= maxRetries) {
             SolrException.log(log, "Recovery failed - max retries exceeded (" + retries + ").");
             try {
-              recoveryFailed(core, zkController, baseUrl, this.coreDescriptor);
-            } catch(InterruptedException e) {
+              recoveryFailed(zkController, baseUrl, this.coreDescriptor);
+            } catch (InterruptedException e) {
               ParWork.propagateInterrupt(e);
               return;
-            }  catch
-            (Exception e) {
+            } catch (Exception e) {
               SolrException.log(log, "Could not publish that recovery failed", e);
             }
             break;
@@ -777,99 +730,101 @@ public class RecoveryStrategy implements Runnable, Closeable {
         } catch (Exception e) {
           SolrException.log(log, "An error has occurred during recovery", e);
         }
+      }
 
-        try {
-          // Wait an exponential interval between retries, start at 2 seconds and work up to a minute.
-          // Since we sleep at 2 seconds sub-intervals in
-          // order to check if we were closed, 30 is chosen as the maximum loopCount (2s * 30 = 1m).
+      if (!successfulRecovery) {
+        waitForRetry();
+      }
+    }
 
-          if (isClosed()) {
-            log.info("RecoveryStrategy has been closed");
-            return;
-          }
+    log.info("Finished doSyncOrReplicateRecovery process, successful=[{}]", successfulRecovery);
+    if (successfulRecovery && !publishedActive) {
+      log.error("Illegal state, successful recovery, but did not publish active");
+      throw new SolrException(ErrorCode.SERVER_ERROR, "Illegal state, successful recovery, but did not publish active");
+    }
+  }
 
-          long wait = startingRecoveryDelayMilliSeconds;
+  private final void waitForRetry() {
+    try {
 
-          if (retries.get() > 1 && retries.get() < 10) {
-            wait = (Math.max(500, startingRecoveryDelayMilliSeconds)) * retries.get();
-          } else if (retries.get() > 0) {
-            wait = TimeUnit.SECONDS.toMillis(60);
-          }
+      long wait = startingRecoveryDelayMilliSeconds;
 
-          log.info("Wait [{}] ms before trying to recover again (attempt={})", wait, retries);
+      if (retries.get() >= 0 && retries.get() < 10) {
+        wait = 0;
+      } else if (retries.get() >= 10 && retries.get() < 20) {
+        wait = 1500;
+      } else if (retries.get() > 0) {
+        wait = TimeUnit.SECONDS.toMillis(60);
+      }
 
-          if (wait > 1000) {
-            TimeOut timeout = new TimeOut(wait, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME);
-            while (!timeout.hasTimedOut()) {
-              if (isClosed()) {
-                log.info("RecoveryStrategy has been closed");
-                return;
-              }
-              Thread.sleep(1000);
-            }
-          } else {
-            Thread.sleep(wait);
-          }
+      log.info("Wait [{}] ms before trying to recover again (attempt={})", wait, retries);
 
-        } catch (InterruptedException e) {
-          ParWork.propagateInterrupt(e, true);
-          return;
+      if (wait > 1000) {
+        TimeOut timeout = new TimeOut(wait, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME);
+        while (!timeout.hasTimedOut()) {
+          if (isClosed()) {
+            log.info("RecoveryStrategy has been closed");
+            throw new AlreadyClosedException();
+          }
+          Thread.sleep(1000);
         }
+      } else {
+        Thread.sleep(wait);
       }
 
+    } catch (InterruptedException e) {
+      ParWork.propagateInterrupt(e, true);
+      throw new AlreadyClosedException();
     }
 
-    // if replay was skipped (possibly to due pulling a full index from the leader),
-    // then we still need to update version bucket seeds after recovery
-    if (successfulRecovery && replayFuture == null) {
-      log.info("Updating version bucket highest from index after successful recovery.");
-      core.seedVersionBuckets();
-    }
-
-    log.info("Finished recovery process, successful=[{}]", successfulRecovery);
   }
 
   public static Runnable testing_beforeReplayBufferingUpdates;
 
-    final private void replay(SolrCore core)
+  final private void replay()
       throws InterruptedException, ExecutionException {
     if (testing_beforeReplayBufferingUpdates != null) {
       testing_beforeReplayBufferingUpdates.run();
     }
-    if (replicaType == Replica.Type.TLOG) {
-      // roll over all updates during buffering to new tlog, make RTG available
-      SolrQueryRequest req = new LocalSolrQueryRequest(core,
-          new ModifiableSolrParams());
-      core.getUpdateHandler().getUpdateLog().copyOverBufferingUpdates(new CommitUpdateCommand(req, false));
-      req.close();
-    }
-    Future<RecoveryInfo> future = core.getUpdateHandler().getUpdateLog().applyBufferedUpdates();
-    if (future == null) {
-      // no replay needed\
-      log.info("No replay needed.");
-    } else {
-      log.info("Replaying buffered documents.");
-      // wait for replay
-      RecoveryInfo report;
-      try {
-        report = future.get(10, TimeUnit.MINUTES); // nocommit - how long? make configurable too
-      } catch (InterruptedException e) {
-        ParWork.propagateInterrupt(e);
-        throw new InterruptedException();
-      } catch (TimeoutException e) {
-        throw new SolrException(ErrorCode.SERVER_ERROR, e);
+    try (SolrCore core = cc.getCore(coreName)) {
+      if (core == null) {
+        log.warn("SolrCore is null, won't do recovery");
+        throw new AlreadyClosedException();
       }
-      if (report.failed) {
-        SolrException.log(log, "Replay failed");
-        throw new SolrException(ErrorCode.SERVER_ERROR, "Replay failed");
+      if (replicaType == Replica.Type.TLOG) {
+        // roll over all updates during buffering to new tlog, make RTG available
+        SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
+        core.getUpdateHandler().getUpdateLog().copyOverBufferingUpdates(new CommitUpdateCommand(req, false));
+        req.close();
+      }
+      Future<RecoveryInfo> future = core.getUpdateHandler().getUpdateLog().applyBufferedUpdates();
+      if (future == null) {
+        // no replay needed\
+        log.info("No replay needed.");
+        return;
+      } else {
+        log.info("Replaying buffered documents.");
+        // wait for replay
+        RecoveryInfo report;
+        try {
+          report = future.get(10, TimeUnit.MINUTES); // nocommit - how long? make configurable too
+        } catch (InterruptedException e) {
+          ParWork.propagateInterrupt(e);
+          throw new InterruptedException();
+        } catch (TimeoutException e) {
+          throw new SolrException(ErrorCode.SERVER_ERROR, e);
+        }
+        if (report.failed) {
+          SolrException.log(log, "Replay failed");
+          throw new SolrException(ErrorCode.SERVER_ERROR, "Replay failed");
+        }
       }
-    }
-
-    // the index may ahead of the tlog's caches after recovery, by calling this tlog's caches will be purged
-    core.getUpdateHandler().getUpdateLog().openRealtimeSearcher();
 
+      // the index may ahead of the tlog's caches after recovery, by calling this tlog's caches will be purged
+      core.getUpdateHandler().getUpdateLog().openRealtimeSearcher();
+    }
     // solrcloud_debug
-    cloudDebugLog(core, "replayed");
+    // cloudDebugLog(core, "replayed");
   }
 
   final private void cloudDebugLog(SolrCore core, String op) {
@@ -893,10 +848,10 @@ public class RecoveryStrategy implements Runnable, Closeable {
   }
 
   final public boolean isClosed() {
-    return close;
+    return close || cc.isShutDown();
   }
 
-  final private void sendPrepRecoveryCmd(SolrCore core, String leaderBaseUrl, String leaderCoreName, Slice slice)
+  final private void sendPrepRecoveryCmd(String leaderBaseUrl, String leaderCoreName, Slice slice)
       throws SolrServerException, IOException {
 
     if (coreDescriptor.getCollectionName() == null) {
@@ -916,27 +871,32 @@ public class RecoveryStrategy implements Runnable, Closeable {
     log.info("Sending prep recovery command to {} for core {} params={}", leaderBaseUrl, leaderCoreName, prepCmd.getParams());
 
     int conflictWaitMs = zkController.getLeaderConflictResolveWait();
-    int readTimeout = conflictWaitMs + Integer.parseInt(System.getProperty("prepRecoveryReadTimeoutExtraWait", "30000"));
+    int readTimeout = conflictWaitMs + Integer.parseInt(System.getProperty("prepRecoveryReadTimeoutExtraWait", "15000"));
     // nocommit
-    try (Http2SolrClient client = new Http2SolrClient.Builder(leaderBaseUrl).withHttpClient(core.getCoreContainer().getUpdateShardHandler().
+    try (Http2SolrClient client = new Http2SolrClient.Builder(leaderBaseUrl).withHttpClient(cc.getUpdateShardHandler().
         getRecoveryOnlyClient()).idleTimeout(readTimeout).markInternalRequest().build()) {
+
       prepCmd.setBasePath(leaderBaseUrl);
       log.info("Sending prep recovery command to [{}]; [{}]", leaderBaseUrl, prepCmd);
       latch = new CountDownLatch(1);
       Cancellable result = client.asyncRequest(prepCmd, null, new NamedListAsyncListener(latch));
-      prevSendPreRecoveryHttpUriRequest = result;
       try {
-        boolean success = latch.await(5, TimeUnit.SECONDS);
-        if (!success) {
-          throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Timeout waiting for prep recovery cmd on leader");
+        prevSendPreRecoveryHttpUriRequest = result;
+        try {
+          boolean success = latch.await(15, TimeUnit.SECONDS);
+          if (!success) {
+            result.cancel();
+            throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Timeout waiting for prep recovery cmd on leader");
+          }
+        } catch (InterruptedException e) {
+          ParWork.propagateInterrupt(e);
+        } finally {
+          prevSendPreRecoveryHttpUriRequest = null;
+          latch = null;
         }
-      } catch (InterruptedException e) {
-        ParWork.propagateInterrupt(e);
       } finally {
-        prevSendPreRecoveryHttpUriRequest = null;
-        latch = null;
+        client.waitForOutstandingRequests();
       }
-
     }
   }
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
index e3e52bc..bf575ae 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
@@ -33,8 +33,6 @@ import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
-import org.apache.solr.common.util.IOUtils;
-import org.apache.solr.common.util.ObjectReleaseTracker;
 import org.apache.solr.core.CoreContainer;
 import org.apache.solr.core.CoreDescriptor;
 import org.apache.solr.core.SolrCore;
@@ -60,7 +58,6 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
   protected final String collection;
   protected final LeaderElector leaderElector;
 
-  private volatile boolean isClosed = false;
 
   private final ZkController zkController;
 
@@ -71,7 +68,6 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
                     + "/leader_elect/" + shardId,  ZkStateReader.getShardLeadersPath(
             collection, shardId), props,
             zkController.getZkClient());
-    assert ObjectReleaseTracker.track(this);
     this.cc = cc;
     this.syncStrategy = new SyncStrategy(cc);
     this.shardId = shardId;
@@ -81,26 +77,8 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
   }
 
   @Override
-  public void close() {
-    super.close();
-    IOUtils.closeQuietly(syncStrategy);
-    this.isClosed = true;
-    assert ObjectReleaseTracker.release(this);
-  }
-
-  @Override
   protected void cancelElection() throws InterruptedException, KeeperException {
     super.cancelElection();
-    String coreName = leaderProps.getName();
-    try {
-      try (SolrCore core = cc.getCore(coreName)) {
-        if (core != null) {
-          core.getCoreDescriptor().getCloudDescriptor().setLeader(false);
-        }
-      }
-    } catch (AlreadyClosedException e) {
-      // okay
-    }
   }
 
   @Override
@@ -172,27 +150,23 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
 
         log.info("I may be the new leader - try and sync");
 
-        // nocommit
-        // we are going to attempt to be the leader
-        // first cancel any current recovery
-        core.getUpdateHandler().getSolrCoreState().cancelRecovery();
-
         PeerSync.PeerSyncResult result = null;
         boolean success = false;
 
         result = syncStrategy.sync(zkController, core, leaderProps, weAreReplacement);
+        log.warn("Sync strategy result {}", result);
         success = result.isSuccess();
 
-        UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
-
         if (!success) {
 
-          log.warn("Our sync attempt failed ulog={}", ulog);
+          log.warn("Our sync attempt failed");
           boolean hasRecentUpdates = false;
+
+          UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
           if (ulog != null) {
             // TODO: we could optimize this if necessary
             try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) {
-              hasRecentUpdates = !recentUpdates.getVersions(1).isEmpty();
+              hasRecentUpdates = recentUpdates != null && !recentUpdates.getVersions(1).isEmpty();
             }
           }
 
@@ -206,7 +180,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
               rejoinLeaderElection(core);
               return;
             } else {
-              log.info("We failed sync, but we have no versions - we can't sync in that case - we were active before, so become leader anyway");
+              log.info("We failed sync, but we have no versions - we can't sync in that case - we did not find versions on other replicas, so become leader anyway");
               success = true;
             }
           }
@@ -214,20 +188,20 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
         log.info("Our sync attempt succeeded");
 
         // solrcloud_debug
-        if (log.isDebugEnabled()) {
-          try {
-            RefCounted<SolrIndexSearcher> searchHolder = core.getNewestSearcher(false);
-            SolrIndexSearcher searcher = searchHolder.get();
-            try {
-              log.debug(core.getCoreContainer().getZkController().getNodeName() + " synched " + searcher.count(new MatchAllDocsQuery()));
-            } finally {
-              searchHolder.decref();
-            }
-          } catch (Exception e) {
-            ParWork.propagateInterrupt(e);
-            throw new SolrException(ErrorCode.SERVER_ERROR, e);
-          }
-        }
+//        if (log.isDebugEnabled()) {
+//          try {
+//            RefCounted<SolrIndexSearcher> searchHolder = core.getNewestSearcher(false);
+//            SolrIndexSearcher searcher = searchHolder.get();
+//            try {
+//              log.debug(core.getCoreContainer().getZkController().getNodeName() + " synched " + searcher.count(new MatchAllDocsQuery()));
+//            } finally {
+//              searchHolder.decref();
+//            }
+//          } catch (Exception e) {
+//            ParWork.propagateInterrupt(e);
+//            throw new SolrException(ErrorCode.SERVER_ERROR, e);
+//          }
+//        }
         if (!success) {
           log.info("Sync with potential leader failed, rejoining election ...");
           rejoinLeaderElection(core);
@@ -257,15 +231,9 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
 
         super.runLeaderProcess(context, weAreReplacement, 0);
 
-        assert shardId != null;
-
-        core.getCoreDescriptor().getCloudDescriptor().setLeader(true);
-
         ZkNodeProps zkNodes = ZkNodeProps
             .fromKeyVals(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(), ZkStateReader.COLLECTION_PROP, collection, ZkStateReader.CORE_NAME_PROP, leaderProps.getName(),
                 ZkStateReader.STATE_PROP, "leader");
-        assert zkController != null;
-        assert zkController.getOverseer() != null;
 
         log.info("I am the new leader, publishing as active: " + leaderProps.getCoreUrl() + " " + shardId);
 
@@ -278,9 +246,6 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
         throw e;
       } catch (Exception e) {
         SolrException.log(log, "There was a problem trying to register as the leader", e);
-
-        core.getCoreDescriptor().getCloudDescriptor().setLeader(false);
-
         // we could not publish ourselves as leader - try and rejoin election
 
         rejoinLeaderElection(core);
@@ -313,7 +278,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
 
       // TODO: if we know eveyrone has already particpated, we should bail early...
       
-      Thread.sleep(500L);
+      Thread.sleep(50L);
     }
   }
 
@@ -343,22 +308,15 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
     return false;
   }
 
-  public void publishActive(SolrCore core) throws Exception {
-    if (log.isDebugEnabled()) log.debug("publishing ACTIVE on becoming leader");
-    zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE, true, false);
-  }
-
   private void rejoinLeaderElection(SolrCore core)
           throws InterruptedException, KeeperException, IOException {
     // remove our ephemeral and re join the election
 
     log.info("There may be a better leader candidate than us - will cancel election, rejoin election, and kick off recovery");
 
-    cancelElection();
-
-    leaderElector.retryElection(this, false);
+    leaderElector.retryElection(false);
 
-    core.getUpdateHandler().getSolrCoreState().doRecovery(zkController.getCoreContainer(), core.getCoreDescriptor());
+    core.getUpdateHandler().getSolrCoreState().doRecovery(core);
   }
 
   public String getShardId() {
diff --git a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
index 4d186f4..cbb5e3d 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
@@ -57,25 +57,14 @@ class ShardLeaderElectionContextBase extends ElectionContext {
   }
 
   @Override
-  public void close() {
-    this.closed = true;
-    try {
-      super.close();
-    } catch (Exception e) {
-      ParWork.propagateInterrupt(e);
-      log.error("Exception canceling election", e);
-    } finally {
-      leaderZkNodeParentVersion = null;
-    }
-  }
-
-  @Override
   protected void cancelElection() throws InterruptedException, KeeperException {
-    if (log.isDebugEnabled()) log.debug("cancelElection");
-    if (!zkClient.isConnected()) {
-      log.info("Can't cancel, zkClient is not connected");
-      return;
-    }
+
+
+    if (log.isTraceEnabled()) log.trace("cancelElection");
+//    if (!zkClient.isConnected()) {
+//      log.info("Can't cancel, zkClient is not connected");
+//      return;
+//    }
     super.cancelElection();
       try {
         if (leaderZkNodeParentVersion != null) {
@@ -105,36 +94,40 @@ class ShardLeaderElectionContextBase extends ElectionContext {
 
             int i = 0;
             List<OpResult> results = e.getResults();
-            for (OpResult result : results) {
-              if (((OpResult.ErrorResult) result).getErr() == -101) {
-                // no node, fine
-              } else {
-                if (result instanceof OpResult.ErrorResult) {
-                  OpResult.ErrorResult dresult = (OpResult.ErrorResult) result;
-                  if (dresult.getErr() != 0) {
-                    log.error("op=" + i++ + " err=" + dresult.getErr());
+            if (results != null) {
+              for (OpResult result : results) {
+                if (((OpResult.ErrorResult) result).getErr() == -101) {
+                  // no node, fine
+                } else {
+                  if (result instanceof OpResult.ErrorResult) {
+                    OpResult.ErrorResult dresult = (OpResult.ErrorResult) result;
+                    if (dresult.getErr() != 0) {
+                      log.error("op=" + i++ + " err=" + dresult.getErr());
+                    }
                   }
+                  throw new SolrException(ErrorCode.SERVER_ERROR, "Exception canceling election " + e.getPath(), e);
                 }
-                throw new SolrException(ErrorCode.SERVER_ERROR, "Exception canceling election " + e.getPath(), e);
               }
             }
 
           } catch (InterruptedException | AlreadyClosedException e) {
             ParWork.propagateInterrupt(e, true);
-            return;
           } catch (Exception e) {
             throw new SolrException(ErrorCode.SERVER_ERROR, "Exception canceling election", e);
           }
         } else {
           try {
             if (leaderSeqPath != null) {
+              if (log.isDebugEnabled()) log.debug("Delete leader seq election path {} path we watch is {}", leaderSeqPath, watchedSeqPath);
               zkClient.delete(leaderSeqPath, -1);
             }
           } catch (NoNodeException e) {
             // fine
           }
-          if (log.isDebugEnabled()) log.debug("No version found for ephemeral leader parent node, won't remove previous leader registration.");
+          if (log.isDebugEnabled()) log.debug("No version found for ephemeral leader parent node, won't remove previous leader registration. {}", leaderSeqPath);
         }
+        leaderSeqPath = null;
+
       } catch (Exception e) {
         if (e instanceof InterruptedException) {
           ParWork.propagateInterrupt(e);
@@ -176,7 +169,7 @@ class ShardLeaderElectionContextBase extends ElectionContext {
       ops.add(Op.setData(parent, null, -1));
       List<OpResult> results;
 
-      results = zkClient.multi(ops, false);
+      results = zkClient.multi(ops, true);
       log.info("Results from call {}", results);
       Iterator<Op> it = ops.iterator();
       for (OpResult result : results) {
@@ -191,6 +184,8 @@ class ShardLeaderElectionContextBase extends ElectionContext {
 
     } catch (NoNodeException e) {
       throw new AlreadyClosedException("No node exists for election");
+    } catch (KeeperException.NodeExistsException e) {
+      throw new AlreadyClosedException("Node already exists for election");
     } catch (Throwable t) {
       ParWork.propagateInterrupt(t);
       throw new SolrException(ErrorCode.SERVER_ERROR, "Could not register as the leader because creating the ephemeral registration node in ZooKeeper failed: " + errors, t);
diff --git a/solr/core/src/java/org/apache/solr/cloud/SolrZkServer.java b/solr/core/src/java/org/apache/solr/cloud/SolrZkServer.java
index 21d3c43..3b5f635 100644
--- a/solr/core/src/java/org/apache/solr/cloud/SolrZkServer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/SolrZkServer.java
@@ -269,10 +269,10 @@ class SolrZkServerProps extends QuorumPeerConfig {
     boolean multiple = false;
     int port = 0;
     for (QuorumPeer.QuorumServer server : slist.values()) {
-      if (server.addr.getHostName().equals(myHost)) {
+      if (server.addr.getOne().getHostName().equals(myHost)) {
         multiple = me!=null;
         me = server.id;
-        port = server.addr.getPort();
+        port = server.addr.getOne().getPort();
       }
     }
 
@@ -292,7 +292,7 @@ class SolrZkServerProps extends QuorumPeerConfig {
     for (QuorumPeer.QuorumServer server : slist.values()) {
       if (server.addr.equals(thisAddr)) {
         if (clientPortAddress == null || clientPortAddress.getPort() <= 0)
-          setClientPort(server.addr.getPort() - 1);
+          setClientPort(server.addr.getOne().getPort() - 1);
         return server.id;
       }
     }
diff --git a/solr/core/src/java/org/apache/solr/cloud/StatePublisher.java b/solr/core/src/java/org/apache/solr/cloud/StatePublisher.java
index 6f98090..a735498 100644
--- a/solr/core/src/java/org/apache/solr/cloud/StatePublisher.java
+++ b/solr/core/src/java/org/apache/solr/cloud/StatePublisher.java
@@ -17,6 +17,7 @@
 package org.apache.solr.cloud;
 
 import org.apache.solr.common.ParWork;
+import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.util.Utils;
@@ -27,6 +28,7 @@ import org.slf4j.LoggerFactory;
 
 import java.io.Closeable;
 import java.lang.invoke.MethodHandles;
+import java.util.Collections;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.Future;
@@ -62,30 +64,38 @@ public class StatePublisher implements Closeable {
         ZkNodeProps bulkMessage = new ZkNodeProps();
         bulkMessage.getProperties().put("operation", "state");
         try {
-          message = workQueue.poll(5, TimeUnit.SECONDS);
+          try {
+            message = workQueue.poll(5, TimeUnit.SECONDS);
+          } catch (InterruptedException e) {
+
+          }
           if (message != null) {
             if (log.isDebugEnabled()) log.debug("Got state message " + message);
             if (message == TERMINATE_OP) {
-              return;
+              terminated = true;
+              message = null;
+            } else {
+              bulkMessage(message, bulkMessage);
             }
 
-            bulkMessage(message, bulkMessage);
-
             while (message != null) {
-              message = workQueue.poll(0, TimeUnit.SECONDS);
-              if (log.isDebugEnabled()) log.debug("Got state message " + message);
+              try {
+                message = workQueue.poll(30, TimeUnit.MILLISECONDS);
+              } catch (InterruptedException e) {
+              return;
+            }
+            if (log.isDebugEnabled()) log.debug("Got state message " + message);
               if (message != null) {
                 if (message == TERMINATE_OP) {
-                  return;
+                  terminated = true;
+                } else {
+                  bulkMessage(message, bulkMessage);
                 }
-                bulkMessage(message, bulkMessage);
               }
             }
             processMessage(bulkMessage);
           }
 
-        } catch (InterruptedException e) {
-          return;
         } catch (Exception e) {
           log.error("Exception in StatePublisher run loop", e);
           return;
@@ -94,19 +104,28 @@ public class StatePublisher implements Closeable {
     }
 
     private void bulkMessage(ZkNodeProps zkNodeProps, ZkNodeProps bulkMessage) throws KeeperException, InterruptedException {
-      if (zkNodeProps.getStr("operation").equals("DOWNNODE")) {
-        bulkMessage.getProperties().put("DOWNNODE", zkNodeProps.getStr(ZkStateReader.NODE_NAME_PROP));
+      if (zkNodeProps.getStr("operation").equals("downnode")) {
+        bulkMessage.getProperties().put("downnode", zkNodeProps.getStr(ZkStateReader.NODE_NAME_PROP));
       } else {
         String collection = zkNodeProps.getStr(ZkStateReader.COLLECTION_PROP);
         String core = zkNodeProps.getStr(ZkStateReader.CORE_NAME_PROP);
         String state = zkNodeProps.getStr(ZkStateReader.STATE_PROP);
 
+        if (collection == null || core == null || state == null) {
+          log.error("Bad state found for publish! {} {}", zkNodeProps, bulkMessage);
+          return;
+        }
+
         bulkMessage.getProperties().put(core, collection + "," + state);
       }
     }
 
     private void processMessage(ZkNodeProps message) throws KeeperException, InterruptedException {
-      overseerJobQueue.offer(Utils.toJSON(message));
+      // do it in a separate thread so that we can be stopped by interrupt without screwing up the ZooKeeper client
+      ParWork.getRootSharedExecutor().invokeAll(Collections.singletonList(() -> {
+        overseerJobQueue.offer(Utils.toJSON(message));
+        return null;
+      }));
     }
   }
 
@@ -121,11 +140,24 @@ public class StatePublisher implements Closeable {
       if (operation.equals("state")) {
         String core = stateMessage.getStr(ZkStateReader.CORE_NAME_PROP);
         String state = stateMessage.getStr(ZkStateReader.STATE_PROP);
+
+
         String lastState = stateCache.get(core);
         if (state.equals(lastState)) {
+          log.info("Skipping publish state as {} for {}, because it was the last state published", state, core);
           return;
         }
+        if (core == null || state == null) {
+          log.error("Nulls in published state");
+          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Nulls in published state " + stateMessage);
+        }
+
         stateCache.put(core, state);
+      } else if (operation.equalsIgnoreCase("downnode")) {
+        // nocommit - set all statecache entries for replica to DOWN
+
+      } else {
+        throw new IllegalArgumentException(stateMessage.toString());
       }
     }
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkCollectionTerms.java b/solr/core/src/java/org/apache/solr/cloud/ZkCollectionTerms.java
index 8127586..4b123d4 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkCollectionTerms.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkCollectionTerms.java
@@ -17,13 +17,12 @@
 
 package org.apache.solr.cloud;
 
-import org.apache.solr.common.AlreadyClosedException;
 import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.util.ObjectReleaseTracker;
 import org.apache.solr.core.CoreDescriptor;
 
-import java.util.HashMap;
 import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
 
 /**
  * Used to manage all ZkShardTerms of a collection
@@ -32,22 +31,19 @@ class ZkCollectionTerms implements AutoCloseable {
   private final String collection;
   private final Map<String, ZkShardTerms> terms;
   private final SolrZkClient zkClient;
-  private boolean closed;
+  private volatile boolean closed;
 
   ZkCollectionTerms(String collection, SolrZkClient client) {
     this.collection = collection;
-    this.terms = new HashMap<>();
+    this.terms = new ConcurrentHashMap<>();
     this.zkClient = client;
     assert ObjectReleaseTracker.track(this);
   }
 
 
-  public ZkShardTerms getShard(String shardId) {
+  ZkShardTerms getShard(String shardId) {
     synchronized (terms) {
       if (!terms.containsKey(shardId)) {
-        if (this.closed) {
-          throw new AlreadyClosedException();
-        }
         terms.put(shardId, new ZkShardTerms(collection, shardId, zkClient));
       }
       return terms.get(shardId);
@@ -55,25 +51,19 @@ class ZkCollectionTerms implements AutoCloseable {
   }
 
   public ZkShardTerms getShardOrNull(String shardId) {
-    synchronized (terms) {
-      if (!terms.containsKey(shardId)) return null;
-      return terms.get(shardId);
-    }
+    if (!terms.containsKey(shardId)) return null;
+    return terms.get(shardId);
   }
 
   public void register(String shardId, String coreNodeName) {
-    synchronized (terms)  {
-      getShard(shardId).registerTerm(coreNodeName);
-    }
+    getShard(shardId).registerTerm(coreNodeName);
   }
 
   public void remove(String shardId, CoreDescriptor coreDescriptor) {
-    synchronized (terms) {
-      ZkShardTerms zterms = getShardOrNull(shardId);
-      if (zterms != null) {
-        if (zterms.removeTerm(coreDescriptor)) {
-          terms.remove(shardId).close();
-        }
+    ZkShardTerms zterms = getShardOrNull(shardId);
+    if (zterms != null) {
+      if (zterms.removeTerm(coreDescriptor)) {
+        terms.remove(shardId).close();
       }
     }
   }
@@ -81,9 +71,20 @@ class ZkCollectionTerms implements AutoCloseable {
   public void close() {
     synchronized (terms) {
       this.closed = true;
+
       terms.values().forEach(ZkShardTerms::close);
+
+      terms.clear();
     }
     assert ObjectReleaseTracker.release(this);
   }
 
+  public boolean cleanUp() {
+    for (ZkShardTerms zkShardTerms : terms.values()) {
+      if (zkShardTerms.getTerms().size() > 0) {
+        return false;
+      }
+    }
+    return true;
+  }
 }
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 17d7858..35d416d 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -16,7 +16,6 @@
  */
 package org.apache.solr.cloud;
 
-import com.google.common.base.Strings;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.solr.client.solrj.cloud.DistributedLock;
 import org.apache.solr.client.solrj.cloud.LockListener;
@@ -110,7 +109,6 @@ import java.nio.charset.StandardCharsets;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.Collections;
 import java.util.Enumeration;
 import java.util.HashMap;
@@ -148,7 +146,6 @@ public class ZkController implements Closeable, Runnable {
   public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
   public final int WAIT_FOR_STATE = Integer.getInteger("solr.waitForState", 10);
 
-  private final boolean SKIP_AUTO_RECOVERY = Boolean.getBoolean("solrcloud.skip.autorecovery");
   private final int zkClientConnectTimeout;
   private final Supplier<List<CoreDescriptor>> descriptorsSupplier;
   private final ZkACLProvider zkACLProvider;
@@ -182,12 +179,10 @@ public class ZkController implements Closeable, Runnable {
     return dcCalled;
   }
 
-  public LeaderElector getShardLeaderElector(String name) {
-    return leaderElectors.get(name);
-  }
-
   public LeaderElector removeShardLeaderElector(String name) {
-    return leaderElectors.remove(name);
+    LeaderElector elector = leaderElectors.remove(name);
+    IOUtils.closeQuietly(elector);
+    return  elector;
   }
 
   static class ContextKey {
@@ -231,25 +226,25 @@ public class ZkController implements Closeable, Runnable {
 
   private final Map<String, LeaderElector> leaderElectors = new ConcurrentHashMap<>(16);
 
-  private final Map<ContextKey, ElectionContext> electionContexts = new ConcurrentHashMap<>(16) {
-    @Override
-    public ElectionContext put(ContextKey key, ElectionContext value) {
-      if (ZkController.this.isClosed || cc.isShutDown()) {
-        throw new AlreadyClosedException();
-      }
-      return super.put(key, value);
-    }
-  };
+//  private final Map<ContextKey, ElectionContext> electionContexts = new ConcurrentHashMap<>(16) {
+//    @Override
+//    public ElectionContext put(ContextKey key, ElectionContext value) {
+//      if (ZkController.this.isClosed || cc.isShutDown()) {
+//        throw new AlreadyClosedException();
+//      }
+//      return super.put(key, value);
+//    }
+//  };
 
-  private final Map<ContextKey, ElectionContext> overseerContexts = new ConcurrentHashMap<>() {
-    @Override
-    public ElectionContext put(ContextKey key, ElectionContext value) {
-      if (ZkController.this.isClosed || cc.isShutDown()) {
-        throw new AlreadyClosedException();
-      }
-      return super.put(key, value);
-    }
-  };
+//  private final Map<ContextKey, ElectionContext> overseerContexts = new ConcurrentHashMap<>() {
+//    @Override
+//    public ElectionContext put(ContextKey key, ElectionContext value) {
+//      if (ZkController.this.isClosed || cc.isShutDown()) {
+//        throw new AlreadyClosedException();
+//      }
+//      return super.put(key, value);
+//    }
+//  };
 
   private volatile SolrZkClient zkClient;
   public volatile ZkStateReader zkStateReader;
@@ -271,7 +266,7 @@ public class ZkController implements Closeable, Runnable {
   private final Map<String, ReplicateFromLeader> replicateFromLeaders = new ConcurrentHashMap<>(132, 0.75f, 50);
   private final Map<String, ZkCollectionTerms> collectionToTerms = new ConcurrentHashMap<>(132, 0.75f, 50);
 
-  private final ReentrantLock collectionToTermsLock = new ReentrantLock(true);
+  private final ReentrantLock collectionToTermsLock = new ReentrantLock(false);
 
   // for now, this can be null in tests, in which case recovery will be inactive, and other features
   // may accept defaults or use mocks rather than pulling things from a CoreContainer
@@ -321,12 +316,10 @@ public class ZkController implements Closeable, Runnable {
   private class RegisterCoreAsync implements Callable<Object> {
 
     CoreDescriptor descriptor;
-    boolean recoverReloadedCores;
     boolean afterExpiration;
 
-    RegisterCoreAsync(CoreDescriptor descriptor, boolean recoverReloadedCores, boolean afterExpiration) {
+    RegisterCoreAsync(CoreDescriptor descriptor, boolean afterExpiration) {
       this.descriptor = descriptor;
-      this.recoverReloadedCores = recoverReloadedCores;
       this.afterExpiration = afterExpiration;
     }
 
@@ -334,8 +327,11 @@ public class ZkController implements Closeable, Runnable {
       if (log.isInfoEnabled()) {
         log.info("Registering core {} afterExpiration? {}", descriptor.getName(), afterExpiration);
       }
-      register(descriptor.getName(), descriptor, recoverReloadedCores, afterExpiration, false);
-      return descriptor;
+      if (cc.getLoadedCoreNames().contains(descriptor.getName())) {
+        register(descriptor.getName(), descriptor);
+        return descriptor;
+      }
+      return null;
     }
   }
 
@@ -416,26 +412,6 @@ public class ZkController implements Closeable, Runnable {
     assert ObjectReleaseTracker.track(this);
   }
 
-  public void closeLeaderContext(CoreDescriptor cd) {
-    String collection = cd.getCloudDescriptor().getCollectionName();
-    final String coreName = cd.getName();
-
-    ContextKey contextKey = new ContextKey(collection, coreName);
-    ElectionContext context = electionContexts.get(contextKey);
-    if (context != null) {
-      try {
-        context.cancelElection();
-      } catch (InterruptedException e) {
-        ParWork.propagateInterrupt(e);
-        throw new SolrException(ErrorCode.SERVER_ERROR, e);
-      } catch (KeeperException e) {
-        throw new SolrException(ErrorCode.SERVER_ERROR, e);
-      } finally {
-        context.close();
-      }
-    }
-  }
-
   public void start() {
     if (started) throw new IllegalStateException("Already started");
 
@@ -457,30 +433,10 @@ public class ZkController implements Closeable, Runnable {
 
       @Override
       public synchronized void command() {
-
-        try (ParWork worker = new ParWork("disconnected", true, true)) {
-          worker.collect("OverseerElectionContexts", overseerContexts.values());
-
-          worker.collect("Overseer", () -> {
-            if (ZkController.this.overseerElector != null) {
-              ZkController.this.overseerElector.getContext().close();
-            }
-          });
-          worker.collect("", () -> {
-            clearZkCollectionTerms();
-          });
-          worker.collect("electionContexts", electionContexts.values());
-          worker.collect("",() -> {
-            markAllAsNotLeader(descriptorsSupplier);
-          });
-          worker.collect("",() -> {
-            cc.cancelCoreRecoveries(true, false);
-          });
-        }
+        clearZkCollectionTerms();
       }
     });
-    overseerContexts.clear();
-    electionContexts.clear();
+
     zkClient.setAclProvider(zkACLProvider);
     zkClient.getConnectionManager().setOnReconnect(new OnReconnect() {
 
@@ -491,37 +447,30 @@ public class ZkController implements Closeable, Runnable {
             log.info("skipping zk reconnect logic due to shutdown");
             return;
           }
-          log.info("ZooKeeper session re-connected ... refreshing core states after session expiration.");
-          try {
-            // recreate our watchers first so that they exist even on any problems below
-            zkStateReader.createClusterStateWatchersAndUpdate();
-
-            // this is troublesome - we dont want to kill anything the old
-            // leader accepted
-            // though I guess sync will likely get those updates back? But
-            // only if
-            // he is involved in the sync, and he certainly may not be
-            // ExecutorUtil.shutdownAndAwaitTermination(cc.getCmdDistribExecutor());
-            // we need to create all of our lost watches
-
-            // seems we dont need to do this again...
-            // Overseer.createClientNodes(zkClient, getNodeName());
-
+          ParWork.getRootSharedExecutor().submit(() -> {
+            log.info("ZooKeeper session re-connected ... refreshing core states after session expiration.");
+            try {
+              // recreate our watchers first so that they exist even on any problems below
+                zkStateReader.createClusterStateWatchersAndUpdate();
 
-            // start the overseer first as following code may need it's processing
+              // this is troublesome - we dont want to kill anything the old
+              // leader accepted
+              // though I guess sync will likely get those updates back? But
+              // only if
+              // he is involved in the sync, and he certainly may not be
+              // ExecutorUtil.shutdownAndAwaitTermination(cc.getCmdDistribExecutor());
+              // we need to create all of our lost watches
 
-            ElectionContext context = getOverseerContext();
+              // seems we dont need to do this again...
+              // Overseer.createClientNodes(zkClient, getNodeName());
 
-            overseerElector.setup(context);
-            overseerElector.joinElection(context, true);
+              // start the overseer first as following code may need it's processing
 
+              overseerElector.retryElection(false);
 
-            // we have to register as live first to pick up docs in the buffer
-            createEphemeralLiveNode();
+              List<CoreDescriptor> descriptors = descriptorsSupplier.get();
+              // re register all descriptors
 
-            List<CoreDescriptor> descriptors = descriptorsSupplier.get();
-            // re register all descriptors
-            try (ParWork parWork = new ParWork(this, true, true)) {
               if (descriptors != null) {
                 for (CoreDescriptor descriptor : descriptors) {
                   // TODO: we need to think carefully about what happens when it
@@ -533,40 +482,45 @@ public class ZkController implements Closeable, Runnable {
                     // unload solrcores that have been 'failed over'
                     // throwErrorIfReplicaReplaced(descriptor);
 
-                    parWork.collect(new RegisterCoreAsync(descriptor, true, true));
+                    ParWork.getRootSharedExecutor().submit(new RegisterCoreAsync(descriptor, true));
 
                   } catch (Exception e) {
                     SolrException.log(log, "Error registering SolrCore", e);
                   }
                 }
               }
-            }
 
-            // notify any other objects that need to know when the session was re-connected
+              // notify any other objects that need to know when the session was re-connected
 
-            try (ParWork parWork = new ParWork(this, true, true)) {
               // the OnReconnect operation can be expensive per listener, so do that async in the background
-              reconnectListeners.forEach(listener -> {
-                try {
-                  parWork.collect(new OnReconnectNotifyAsync(listener));
-                } catch (Exception exc) {
-                  // not much we can do here other than warn in the log
-                  log.warn("Error when notifying OnReconnect listener {} after session re-connected.", listener, exc);
-                }
-              });
+              try (ParWork work = new ParWork(this, true, true)) {
+                reconnectListeners.forEach(listener -> {
+                  try {
+                    work.collect(new OnReconnectNotifyAsync(listener));
+                  } catch (Exception exc) {
+                    // not much we can do here other than warn in the log
+                    log.warn("Error when notifying OnReconnect listener {} after session re-connected.", listener, exc);
+                  }
+                });
+              }
+
+              createEphemeralLiveNode();
+
+            } catch (AlreadyClosedException e) {
+              log.info("Already closed");
+              return;
+            } catch (Exception e) {
+              SolrException.log(log, "", e);
             }
-          } catch (InterruptedException e) {
-            log.warn("interrupted", e);
-          } catch (SessionExpiredException e) {
-            log.warn("SessionExpiredException", e);
-          } catch (AlreadyClosedException e) {
-            log.info("Already closed");
-            return;
-          } catch (Exception e) {
-            SolrException.log(log, "", e);
-          }
+          });
+
         }
       }
+
+      @Override
+      public String getName() {
+        return "ZkController";
+      }
     });
 
     zkClient.setIsClosed(new ConnectionManager.IsClosed() {
@@ -576,36 +530,27 @@ public class ZkController implements Closeable, Runnable {
         return cc.isShutDown();
       }});
     zkClient.setDisconnectListener(() -> {
-      if (isClosed()) return;
-        try (ParWork worker = new ParWork("disconnected", true, true)) {
-          if (zkClient.isConnected()) {
-            worker.collect(ZkController.this.overseerContexts);
-          } else {
-            worker.collect(ZkController.this.overseer);
-          }
-          worker.collect("clearZkCollectionTerms", () -> {
-            clearZkCollectionTerms();
-          });
-          if (zkClient.isConnected()) {
-            worker.collect(electionContexts.values());
+      try (ParWork worker = new ParWork("disconnected", true, true)) {
+        worker.collect(ZkController.this.overseerElector);
+        worker.collect(ZkController.this.overseer);
+
+        worker.collect("clearZkCollectionTerms", () -> {
+          clearZkCollectionTerms();
+        });
+        if (zkClient.isAlive()) {
+          synchronized (leaderElectors) {
+            worker.collect(leaderElectors.values());
           }
-          worker.collect("markAllAsNotLeader", () -> {
-            markAllAsNotLeader(descriptorsSupplier);
-          });
         }
+      }
+
     });
     init();
   }
 
   private ElectionContext getOverseerContext() {
     ElectionContext context = new OverseerElectionContext(getNodeName(), zkClient, overseer);
-    ElectionContext prevContext = overseerContexts.put(new ContextKey("overseer", "overseer"), context);
-    if (prevContext != null) {
-      prevContext.close();
-    }
-    if (overseerElector != null) {
-      ParWork.close(overseerElector.getContext());
-    }
+
     return context;
   }
 
@@ -660,7 +605,6 @@ public class ZkController implements Closeable, Runnable {
     List<CoreDescriptor> descriptors = registerOnReconnect.get();
     if (descriptors != null) {
       for (CoreDescriptor descriptor : descriptors) {
-        descriptor.getCloudDescriptor().setLeader(false);
         descriptor.getCloudDescriptor().setHasRegistered(false);
       }
     }
@@ -670,12 +614,21 @@ public class ZkController implements Closeable, Runnable {
     if (log.isDebugEnabled()) log.debug("disconnect");
     this.dcCalled = true;
     try (ParWork closer = new ParWork(this, true, true)) {
-      closer.collect( "replicateFromLeaders", replicateFromLeaders);
+      closer.collect("replicateFromLeaders", replicateFromLeaders);
 
       if (getZkClient().getConnectionManager().isConnected()) {
+        closer.collect("removeEphemeralLiveNode", () -> {
+          try {
+            removeEphemeralLiveNode();
+          } catch (Exception e) {
+            ParWork.propagateInterrupt("Error Removing ephemeral live node. Continuing to close CoreContainer", e);
+          }
+          return "RemoveEphemNode";
+
+        });
+
         if (publishDown) {
           closer.collect("PublishNodeAsDown&RepFromLeaders", () -> {
-
             try {
               log.info("Publish this node as DOWN...");
               publishNodeAsDown(getNodeName());
@@ -683,19 +636,40 @@ public class ZkController implements Closeable, Runnable {
               ParWork.propagateInterrupt("Error publishing nodes as down. Continuing to close CoreContainer", e);
             }
             return "PublishDown";
-
           });
+          closer.collect();
         }
-        closer.collect("removeEphemeralLiveNode", () -> {
-          try {
-            removeEphemeralLiveNode();
-          } catch (Exception e) {
-            ParWork.propagateInterrupt("Error Removing ephemeral live node. Continuing to close CoreContainer", e);
-          }
-          return "RemoveEphemNode";
+      }
 
+      synchronized (leaderElectors) {
+        closer.collect(leaderElectors);
+      }
+
+      closer.collect(overseerElector);
+
+      if (overseer != null) {
+        closer.collect("", () -> {
+          overseer.closeAndDone();
         });
       }
+      closer.collect(sysPropsCacher);
+      closer.collect(cloudManager);
+      closer.collect(cloudSolrClient);
+
+      closer.collect("", () -> {
+        try {
+          if (statePublisher != null) {
+            statePublisher.submitState(StatePublisher.TERMINATE_OP);
+          }
+        } catch (Exception e) {
+          log.error("Exception closing state publisher");
+        }
+      });
+
+    } finally {
+      synchronized (leaderElectors) {
+        leaderElectors.clear();
+      }
     }
   }
 
@@ -706,42 +680,20 @@ public class ZkController implements Closeable, Runnable {
     if (log.isDebugEnabled()) log.debug("Closing ZkController");
     //assert closeTracker.close();
 
-    IOUtils.closeQuietly(overseerElector);
-
-    leaderElectors.forEach((s, leaderElector) -> IOUtils.closeQuietly(leaderElector));
-
-    if (overseer != null) {
-      overseer.closeAndDone();
-    }
-
     this.shudownCalled = true;
 
     this.isClosed = true;
-
-
-    try {
-      if (statePublisher != null) {
-        statePublisher.submitState(StatePublisher.TERMINATE_OP);
-      }
-      IOUtils.closeQuietly(statePublisher);
-    } catch (Exception e) {
-      log.error("Exception closing state publisher");
-    }
-
-
     try (ParWork closer = new ParWork(this, true, true)) {
-      closer.collect(replicateFromLeaders);
-      closer.collect(electionContexts);
-      closer.collect(collectionToTerms);
-      closer.collect(sysPropsCacher);
-      closer.collect(cloudManager);
-      closer.collect(cloudSolrClient);
-      closer.collect(overseerContexts);
+      synchronized (leaderElectors) {
+        closer.collect(leaderElectors);
+      }
+      collectionToTerms.forEach((s, zkCollectionTerms) -> closer.collect(zkCollectionTerms));
     }
 
     IOUtils.closeQuietly(zkStateReader);
 
     if (closeZkClient) {
+      zkClient.disableCloseLock();
       IOUtils.closeQuietly(zkClient);
     }
 
@@ -771,7 +723,7 @@ public class ZkController implements Closeable, Runnable {
     int numActiveReplicas = shard.getReplicas(
         rep -> rep.getState() == Replica.State.ACTIVE
             && rep.getType() != Type.PULL
-            && getClusterState().getLiveNodes().contains(rep.getNodeName())
+            && getZkStateReader().getLiveNodes().contains(rep.getNodeName())
     ).size();
 
     // at least the leader still be able to search, we should give up leadership if other replicas can take over
@@ -1136,9 +1088,9 @@ public class ZkController implements Closeable, Runnable {
             }
             createdClusterNodes = true;
           } else {
-            if (log.isDebugEnabled()) log.debug("Cluster zk nodes already exist");
-            int currentLiveNodes = zkClient.getChildren(ZkStateReader.LIVE_NODES_ZKNODE, null, true).size();
-            if (log.isDebugEnabled()) log.debug("Current live nodes {}", currentLiveNodes);
+            //if (log.isDebugEnabled()) log.debug("Cluster zk nodes already exist");
+            //int currentLiveNodes = zkClient.getChildren(ZkStateReader.LIVE_NODES_ZKNODE, null, true).size();
+            //if (log.isDebugEnabled()) log.debug("Current live nodes {}", currentLiveNodes);
             //          if (currentLiveNodes == 0) {
             //            log.info("Delete Overseer queues");
             //            // cluster is in a startup state, clear zk queues
@@ -1203,6 +1155,7 @@ public class ZkController implements Closeable, Runnable {
       zkStateReader = new ZkStateReader(zkClient, () -> {
         if (cc != null) cc.securityNodeChanged();
       });
+      zkStateReader.setCollectionRemovedListener(collection -> removeCollectionTerms(collection));
       this.baseURL = zkStateReader.getBaseUrlForNodeName(this.nodeName);
 
       zkStateReader.createClusterStateWatchersAndUpdate();
@@ -1216,25 +1169,18 @@ public class ZkController implements Closeable, Runnable {
       this.overseerCollectionQueue = overseer.getCollectionQueue(zkClient);
       this.overseerConfigSetQueue = overseer.getConfigSetQueue(zkClient);
       this.sysPropsCacher = new NodesSysPropsCacher(getSolrCloudManager().getNodeStateProvider(), getNodeName(), zkStateReader);
-      overseerElector = new LeaderElector(this, new ContextKey("overseer", "overseer"), overseerContexts);
-      try (ParWork worker = new ParWork(this, false, true)) {
+      overseerElector = new LeaderElector(this, new ContextKey("overseer", "overseer"));
+      //try (ParWork worker = new ParWork(this, false, true)) {
         // start the overseer first as following code may need it's processing
-        worker.collect("startOverseer", () -> {
+       // worker.collect("startOverseer", () -> {
           ElectionContext context = getOverseerContext();
           if (log.isDebugEnabled()) log.debug("Overseer setting up context {}", context.leaderProps.getNodeName());
           overseerElector.setup(context);
-          try {
-            log.info("Overseer joining election {}", context.leaderProps.getNodeName());
-            overseerElector.joinElection(context, false);
-          } catch (KeeperException e) {
-            throw new SolrException(ErrorCode.SERVER_ERROR, e);
-          } catch (InterruptedException e) {
-            ParWork.propagateInterrupt(e);
-            throw new SolrException(ErrorCode.SERVER_ERROR, e);
-          } catch (IOException e) {
-            throw new SolrException(ErrorCode.SERVER_ERROR, e);
-          }
-        });
+
+          log.info("Overseer joining election {}", context.leaderProps.getNodeName());
+          overseerElector.joinElection(false);
+
+       // });
 
         //          worker.collect("publishDownState", () -> {
         //            try {
@@ -1249,14 +1195,12 @@ public class ZkController implements Closeable, Runnable {
         //              throw new SolrException(ErrorCode.SERVER_ERROR, e);
         //            }
         //          });
-      }
+      //}
       statePublisher = new StatePublisher(overseerJobQueue);
       statePublisher.start();
 
-      // Do this last to signal we're up.
-      createEphemeralLiveNode();
-
-      //  publishAndWaitForDownStates();
+      // nocommit
+      //publishDownStates();
     } catch (InterruptedException e) {
       ParWork.propagateInterrupt(e);
       throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
@@ -1330,7 +1274,7 @@ public class ZkController implements Closeable, Runnable {
     return zkClient.isConnected();
   }
 
-  private void createEphemeralLiveNode() {
+  public void createEphemeralLiveNode() {
     String nodeName = getNodeName();
     String nodePath = ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName;
 
@@ -1340,19 +1284,14 @@ public class ZkController implements Closeable, Runnable {
   }
 
   private void createLiveNodeImpl(String nodePath) {
-    Map<String, byte[]> dataMap = new HashMap<>(2);
-    Map<String, CreateMode> createModeMap = new HashMap<>(2);
-    dataMap.put(nodePath, null);
-    createModeMap.put(nodePath, CreateMode.EPHEMERAL);
     try {
-      zkClient.setData(ZkStateReader.LIVE_NODES_ZKNODE, (byte[]) null, true);
       try {
         zkClient.getSolrZooKeeper().create(nodePath, null, zkClient.getZkACLProvider().getACLsToAdd(nodePath), CreateMode.EPHEMERAL);
+        zkClient.setData(ZkStateReader.LIVE_NODES_ZKNODE, (byte[]) null, true);
       } catch (KeeperException.NodeExistsException e) {
-        log.warn("Found our ephemeral live node already exists. This must be a quick restart after a hard shutdown, waiting for it to expire {}", nodePath);
+        log.warn("Found our ephemeral live node already exists. This must be a quick restart after a hard shutdown ... {}", nodePath);
         // TODO nocommit wait for expiration properly and try again?
-        Thread.sleep(15000);
-        zkClient.getSolrZooKeeper().create(nodePath, null, zkClient.getZkACLProvider().getACLsToAdd(nodePath), CreateMode.EPHEMERAL);
+        throw new SolrException(ErrorCode.SERVER_ERROR, e);
       }
     } catch (Exception e) {
       ParWork.propagateInterrupt(e);
@@ -1364,8 +1303,8 @@ public class ZkController implements Closeable, Runnable {
     String nodeName = getNodeName();
     String nodePath = ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName;
     try {
-      zkClient.setData(ZkStateReader.LIVE_NODES_ZKNODE, (byte[]) null, true);
       zkClient.delete(nodePath, -1);
+      zkClient.setData(ZkStateReader.LIVE_NODES_ZKNODE, (byte[]) null, true);
     } catch (NoNodeException e) {
       // okay
     }
@@ -1389,10 +1328,12 @@ public class ZkController implements Closeable, Runnable {
 //        new UnloadCoreOnDeletedWatcher(shardId, name));
   }
 
-  public String register(String coreName, final CoreDescriptor desc, boolean recoverReloadedCores,
-      boolean afterExpiration, boolean skipRecovery) throws Exception {
+  public String register(String coreName, final CoreDescriptor desc) throws Exception {
     try (SolrCore core = cc.getCore(coreName)) {
-     return register(core, desc, recoverReloadedCores, afterExpiration, skipRecovery);
+      if (core == null || core.isClosing() || getCoreContainer().isShutDown()) {
+        throw new AlreadyClosedException();
+      }
+     return register(core, desc, false);
     }
   }
 
@@ -1401,57 +1342,89 @@ public class ZkController implements Closeable, Runnable {
    *
    * @return the shardId for the SolrCore
    */
-  private String register(SolrCore core, final CoreDescriptor desc, boolean recoverReloadedCores,
-                         boolean afterExpiration, boolean skipRecovery) throws Exception {
+  private String register(SolrCore core, final CoreDescriptor desc, boolean afterExpiration) throws Exception {
+    if (getCoreContainer().isShutDown()) {
+      throw new AlreadyClosedException();
+    }
     MDCLoggingContext.setCoreDescriptor(cc, desc);
+    String coreName = core.getName();
+    LeaderElector leaderElector = leaderElectors.get(coreName);
+    if (core.isClosing() || cc.isShutDown() || (leaderElector != null && leaderElector.isClosed())) {
+      throw new AlreadyClosedException();
+    }
+
+    boolean success = false;
     try {
       // pre register has published our down state
-      String coreName = core.getName();
+
       final String baseUrl = getBaseUrl();
       final CloudDescriptor cloudDesc = desc.getCloudDescriptor();
       final String collection = cloudDesc.getCollectionName();
       final String shardId = cloudDesc.getShardId();
-      log.info("Register SolrCore, core={} baseUrl={} collection={}, shard={} skipRecovery={}", coreName, baseUrl, collection, shardId, skipRecovery);
+
+      // the watcher is added to a set so multiple calls of this method will left only one watcher
+      getZkStateReader().registerCore(cloudDesc.getCollectionName());
+
+      log.info("Register SolrCore, core={} baseUrl={} collection={}, shard={} skipRecovery={}", coreName, baseUrl, collection, shardId);
       AtomicReference<DocCollection> coll = new AtomicReference<>();
+      AtomicReference<Replica> replicaRef = new AtomicReference<>();
       try {
-        zkStateReader.waitForState(collection, Integer.getInteger("solr.zkregister.leaderwait", 60000), TimeUnit.MILLISECONDS, (l, c) -> { // nocommit timeout
-//          if (isClosed()) {
-//            throw new AlreadyClosedException();
-//          }
-
-          coll.set(c);
+        log.info("Waiting to see our entry in state.json {}", desc.getName());
+        zkStateReader.waitForState(collection, Integer.getInteger("solr.zkregister.leaderwait", 5000), TimeUnit.MILLISECONDS, (l, c) -> { // nocommit timeout
           if (c == null) {
             return false;
           }
-          if (c.getReplica(coreName) != null) {
+          coll.set(c);
+          Replica r = c.getReplica(coreName);
+          if (r != null) {
+            replicaRef.set(r);
             return true;
           }
           return false;
         });
       } catch (TimeoutException e) {
-        throw new TimeoutException("Timeout waiting to see core " + coreName  + " \n" + coll.get());
+        log.warn("Timeout waiting to see core " + coreName + " \ncollection=" + collection + " " + coll.get());
       }
 
-      Replica replica = getReplicaOrNull(zkStateReader.getClusterState().getCollectionOrNull(collection), shardId, coreName);
+      Replica replica = replicaRef.get();
+      
       if (replica == null) {
-        throw new SolrException(ErrorCode.SERVER_ERROR, "Error registering SolrCore, replica is removed from clusterstate");
+        replica = zkStateReader.getClusterState().getCollection(collection).getReplica(coreName);
+        if (replica == null) {
+          throw new SolrException(ErrorCode.SERVER_ERROR, "Error registering SolrCore, replica is removed from clusterstate \n" + zkStateReader.getClusterState().getCollectionOrNull(collection));
+        }
       }
-
+      ZkShardTerms shardTerms = null;
       if (replica.getType() != Type.PULL) {
         log.info("Register terms for replica {}", coreName);
-        getCollectionTerms(collection).register(cloudDesc.getShardId(), coreName);
+        createCollectionTerms(collection).register(cloudDesc.getShardId(), coreName);
+        shardTerms = getShardTermsOrNull(collection, cloudDesc.getShardId());
       }
 
-      ZkShardTerms shardTerms = getShardTerms(collection, cloudDesc.getShardId());
+
 
       log.info("Register replica - core:{} address:{} collection:{} shard:{} type={}", coreName, baseUrl, collection, shardId, replica.getType());
+      synchronized (leaderElectors) {
+        leaderElector = leaderElectors.get(replica.getName());
+        if (leaderElector == null) {
+          ContextKey contextKey = new ContextKey(collection, coreName);
+          leaderElector = new LeaderElector(this, contextKey);
+          if (cc.isShutDown()) {
+            leaderElector.close();
+            throw new AlreadyClosedException();
+          }
+          LeaderElector oldElector = leaderElectors.put(replica.getName(), leaderElector);
+          IOUtils.closeQuietly(oldElector);
+        }
+      }
+
       //
       try {
         // If we're a preferred leader, insert ourselves at the head of the queue
         boolean joinAtHead = replica.getBool(SliceMutator.PREFERRED_LEADER_PROP, false);
         if (replica.getType() != Type.PULL) {
           // nocommit review
-          joinElection(desc, afterExpiration, joinAtHead);
+          joinElection(desc, joinAtHead);
         } else if (replica.getType() == Type.PULL) {
           if (joinAtHead) {
             log.warn("Replica {} was designated as preferred leader but it's type is {}, It won't join election", coreName, Type.PULL);
@@ -1468,88 +1441,91 @@ public class ZkController implements Closeable, Runnable {
 
       if (log.isDebugEnabled()) log.debug("Wait to see leader for {}, {}", collection, shardId);
       Replica leader = null;
-      for (int i = 0; i < 15; i++) {
+      for (int i = 0; i < 30; i++) {
         try {
-          if (isClosed()) {
+          if (getCoreContainer().isShutDown()) {
             throw new AlreadyClosedException();
           }
 
-          leader = zkStateReader.getLeaderRetry(collection, shardId, 3000);
+          leader = zkStateReader.getLeaderRetry(collection, shardId, 1000);
           break;
         } catch (TimeoutException timeoutException) {
 
         }
       }
 
+      if (leader == null) {
+        throw new SolrException(ErrorCode.SERVER_ERROR, "No leader found while trying to register " + coreName + " with zookeeper");
+      }
+
       String ourUrl = replica.getCoreUrl();
-      boolean isLeader = leader.getName() .equals(coreName);
+      boolean isLeader = leader.getName().equals(coreName);
 
       log.info("We are {} and leader is {} isLeader={}", ourUrl, leader.getCoreUrl(), isLeader);
 
       log.info("Check if we should recover isLeader={}", isLeader);
       //assert !(isLeader && replica.getType() == Type.PULL) : "Pull replica became leader!";
 
-      try {
-
-        // recover from local transaction log and wait for it to complete before
-        // going active
-        // TODO: should this be moved to another thread? To recoveryStrat?
-        // TODO: should this actually be done earlier, before (or as part of)
-        // leader election perhaps?
-
-        UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
-        boolean isTlogReplicaAndNotLeader = replica.getType() == Replica.Type.TLOG && !isLeader;
-        if (isTlogReplicaAndNotLeader) {
-          String commitVersion = ReplicateFromLeader.getCommitVersion(core);
-          if (commitVersion != null) {
-            ulog.copyOverOldUpdates(Long.parseLong(commitVersion));
-          }
+      // recover from local transaction log and wait for it to complete before
+      // going active
+      // TODO: should this be moved to another thread? To recoveryStrat?
+      // TODO: should this actually be done earlier, before (or as part of)
+      // leader election perhaps?
+
+      UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
+      boolean isTlogReplicaAndNotLeader = replica.getType() == Replica.Type.TLOG && !isLeader;
+      if (isTlogReplicaAndNotLeader) {
+        String commitVersion = ReplicateFromLeader.getCommitVersion(core);
+        if (commitVersion != null) {
+          ulog.copyOverOldUpdates(Long.parseLong(commitVersion));
         }
-        // we will call register again after zk expiration and on reload
-        if (!afterExpiration && !core.isReloaded() && ulog != null && !isTlogReplicaAndNotLeader) {
-          // disable recovery in case shard is in construction state (for shard splits)
-          Slice slice = getClusterState().getCollection(collection).getSlice(shardId);
-          if (slice.getState() != Slice.State.CONSTRUCTION || !isLeader) {
-            Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler().getUpdateLog().recoverFromLog();
-            if (recoveryFuture != null) {
-              log.info("Replaying tlog for {} during startup... NOTE: This can take a while.", core);
-              recoveryFuture.get(); // NOTE: this could potentially block for
-              // minutes or more!
-              // TODO: public as recovering in the mean time?
-              // TODO: in the future we could do peersync in parallel with recoverFromLog
-            } else {
-              if (log.isDebugEnabled()) {
-                log.debug("No LogReplay needed for core={} baseURL={}", core.getName(), baseUrl);
-              }
+      }
+      // we will call register again after zk expiration and on reload
+      if (!afterExpiration &&  ulog != null && !isTlogReplicaAndNotLeader) {
+        // disable recovery in case shard is in construction state (for shard splits)
+        Slice slice = getClusterState().getCollection(collection).getSlice(shardId);
+        if (slice.getState() != Slice.State.CONSTRUCTION || !isLeader) {
+          Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler().getUpdateLog().recoverFromLog();
+          if (recoveryFuture != null) {
+            log.info("Replaying tlog for {} during startup... NOTE: This can take a while.", core);
+            recoveryFuture.get(); // NOTE: this could potentially block for
+            // minutes or more!
+            // TODO: public as recovering in the mean time?
+            // TODO: in the future we could do peersync in parallel with recoverFromLog
+          } else {
+            if (log.isDebugEnabled()) {
+              log.debug("No LogReplay needed for core={} baseURL={}", core.getName(), baseUrl);
             }
           }
         }
-        boolean didRecovery = checkRecovery(recoverReloadedCores, isLeader, skipRecovery, collection, coreName, shardId, core, cc, afterExpiration);
+      }
 
-        if (!didRecovery) {
-          if (isTlogReplicaAndNotLeader) {
-            startReplicationFromLeader(coreName, true);
-          }
+      boolean didRecovery = checkRecovery(isLeader, collection, coreName, shardId, core, cc);
+
+      if (!didRecovery) {
+        if (isTlogReplicaAndNotLeader) {
+          startReplicationFromLeader(coreName, true);
         }
 
-        if (replica.getType() != Type.PULL) {
-          // the watcher is added to a set so multiple calls of this method will left only one watcher
-          shardTerms.addListener(new RecoveringCoreTermWatcher(core.getCoreDescriptor(), getCoreContainer()));
+        if (!isLeader) {
+          publish(desc, Replica.State.ACTIVE, true);
         }
-        desc.getCloudDescriptor().setHasRegistered(true);
+      }
 
+      if (replica.getType() != Type.PULL) {
         // the watcher is added to a set so multiple calls of this method will left only one watcher
-        getZkStateReader().registerCore(cloudDesc.getCollectionName());
-        // the watcher is added to a set so multiple calls of this method will left only one watcher
-        // nocommit
-        registerUnloadWatcher(cloudDesc.getCollectionName(), cloudDesc.getShardId(), desc.getName());
-
-      } catch (Exception e) {
-        SolrZkClient.checkInterrupted(e);
-        unregister(coreName, desc, false);
-        throw e;
+        if (log.isDebugEnabled()) log.debug("add shard terms listener for {}", coreName);
+        shardTerms.addListener(new RecoveringCoreTermWatcher(core.getCoreDescriptor(), getCoreContainer()));
       }
+
+      desc.getCloudDescriptor().setHasRegistered(true);
+
+      // the watcher is added to a set so multiple calls of this method will left only one watcher
+      // nocommit
+      registerUnloadWatcher(cloudDesc.getCollectionName(), cloudDesc.getShardId(), desc.getName());
+
       log.info("SolrCore Registered, core{} baseUrl={} collection={}, shard={}", coreName, baseUrl, collection, shardId);
+      success = true;
       return shardId;
     } finally {
       MDCLoggingContext.clear();
@@ -1571,7 +1547,9 @@ public class ZkController implements Closeable, Runnable {
     stopReplicationFromLeader(coreName);
 
     ReplicateFromLeader replicateFromLeader = new ReplicateFromLeader(cc, coreName);
-
+    if (isDcCalled() || isClosed || cc.isShutDown()) {
+      return;
+    }
     ReplicateFromLeader prev = replicateFromLeaders.putIfAbsent(coreName, replicateFromLeader);
     if (prev == null) {
       replicateFromLeader.startReplication(switchTransactionLog);
@@ -1582,7 +1560,7 @@ public class ZkController implements Closeable, Runnable {
       } catch (Exception e) {
         ParWork.propagateInterrupt("Error closing previous replication attempt", e);
       }
-      if (isClosed()) throw new AlreadyClosedException();
+     // if (isClosed()) throw new AlreadyClosedException();
       replicateFromLeader.startReplication(switchTransactionLog);
     }
 
@@ -1672,20 +1650,12 @@ public class ZkController implements Closeable, Runnable {
   }
 
 
-  private boolean joinElection(CoreDescriptor cd, boolean afterExpiration, boolean joinAtHead)
+  private void joinElection(CoreDescriptor cd, boolean joinAtHead)
       throws InterruptedException, KeeperException, IOException {
     log.info("joinElection {}", cd.getName());
     // look for old context - if we find it, cancel it
     String collection = cd.getCloudDescriptor().getCollectionName();
 
-    ContextKey contextKey = new ContextKey(collection, cd.getName());
-
-    ElectionContext prevContext = electionContexts.get(contextKey);
-
-    if (prevContext != null) {
-      prevContext.close();
-    }
-
     String shardId = cd.getCloudDescriptor().getShardId();
 
     Map<String, Object> props = new HashMap<>();
@@ -1694,46 +1664,39 @@ public class ZkController implements Closeable, Runnable {
 
     Replica replica = new Replica(cd.getName(), props, collection, shardId, zkStateReader);
     LeaderElector leaderElector;
+
     synchronized (leaderElectors) {
-      leaderElector = leaderElectors.get(replica.getName());
+       leaderElector = leaderElectors.get(replica.getName());
       if (leaderElector == null) {
-         leaderElector = new LeaderElector(this, contextKey, electionContexts);
-         leaderElectors.put(replica.getName(), leaderElector);
+        ContextKey contextKey = new ContextKey(collection, replica.getName());
+        leaderElector = new LeaderElector(this, contextKey);
+        LeaderElector oldElector = leaderElectors.put(replica.getName(), leaderElector);
+        IOUtils.closeQuietly(oldElector);
+      } else {
+        leaderElector.cancel();
       }
     }
 
     ElectionContext context = new ShardLeaderElectionContext(leaderElector, shardId,
         collection, cd.getName(), replica, this, cc);
 
-    prevContext = electionContexts.put(contextKey, context);
-    if (prevContext != null) {
-      prevContext.close();
-    }
 
     leaderElector.setup(context);
     log.info("Joining election ...");
-    return leaderElector.joinElection(context, false, joinAtHead);
+    leaderElector.joinElection( false, joinAtHead);
   }
 
 
   /**
    * Returns whether or not a recovery was started
    */
-  private boolean checkRecovery(boolean recoverReloadedCores, final boolean isLeader, boolean skipRecovery,
+  private boolean checkRecovery(final boolean isLeader,
                                 final String collection, String coreZkNodeName, String shardId,
-                                SolrCore core, CoreContainer cc, boolean afterExpiration) {
-    if (SKIP_AUTO_RECOVERY) {
-      log.warn("Skipping recovery according to sys prop solrcloud.skip.autorecovery");
-      return false;
-    }
+                                SolrCore core, CoreContainer cc) {
     boolean doRecovery = true;
     if (!isLeader) {
 
-      if (skipRecovery || (!afterExpiration && core.isReloaded() && !recoverReloadedCores)) {
-        doRecovery = false;
-      }
-
-      if (doRecovery) {
+      if (doRecovery && !core.getUpdateHandler().getSolrCoreState().isRecoverying()) {
         if (log.isInfoEnabled()) {
           log.info("Core needs to recover:{}", core.getName());
         }
@@ -1748,6 +1711,9 @@ public class ZkController implements Closeable, Runnable {
         }
         core.getUpdateHandler().getSolrCoreState().doRecovery(cc, core.getCoreDescriptor());
         return true;
+      } else {
+        log.info("Leaders term did not force us into recovery");
+
       }
     } else {
       log.info("I am the leader, no recovery necessary");
@@ -1762,27 +1728,27 @@ public class ZkController implements Closeable, Runnable {
   }
 
   public void publish(final CoreDescriptor cd, final Replica.State state) throws Exception {
-    publish(cd, state, true, false);
+    publish(cd, state, true);
   }
 
   /**
    * Publish core state to overseer.
    */
-  public void publish(final CoreDescriptor cd, final Replica.State state, boolean updateLastState, boolean forcePublish) throws Exception {
+  public void publish(final CoreDescriptor cd, final Replica.State state, boolean updateLastState) throws Exception {
     MDCLoggingContext.setCoreDescriptor(cc, cd);
-
+    log.info("publishing state={}", state);
     try (SolrCore core = cc.getCore(cd.getName())) {
       if ((state == Replica.State.ACTIVE || state == Replica.State.RECOVERING) && (isClosed() || (core != null && core.isClosing()))) {
+        log.info("already closed, won't publish state={}", state);
         throw new AlreadyClosedException();
       }
     }
 
-    // nocommit TODO if we publish anything but ACTIVE, cancel any possible election
+    // nocommit TODO if we publish anything but ACTIVE, cancel any possible election?
 
     try {
       String collection = cd.getCloudDescriptor().getCollectionName();
 
-      log.info("publishing state={}", state);
       // System.out.println(Thread.currentThread().getStackTrace()[3]);
       Integer numShards = cd.getCloudDescriptor().getNumShards();
       if (numShards == null) { // XXX sys prop hack
@@ -1790,8 +1756,6 @@ public class ZkController implements Closeable, Runnable {
         numShards = Integer.getInteger(ZkStateReader.NUM_SHARDS_PROP);
       }
 
-      assert collection != null && collection.length() > 0;
-
       String shardId = cd.getCloudDescriptor().getShardId();
 
       Map<String,Object> props = new HashMap<>();
@@ -1832,7 +1796,11 @@ public class ZkController implements Closeable, Runnable {
       if (state == Replica.State.RECOVERING && cd.getCloudDescriptor().getReplicaType() != Type.PULL) {
         // state is used by client, state of replica can change from RECOVERING to DOWN without needed to finish recovery
         // by calling this we will know that a replica actually finished recovery or not
-        getShardTerms(collection, shardId).startRecovering(cd.getName());
+        ZkShardTerms shardTerms = getShardTermsOrNull(collection, shardId);
+        if (shardTerms == null) {
+          throw new AlreadyClosedException();
+        }
+        shardTerms.startRecovering(cd.getName());
       }
       if (state == Replica.State.ACTIVE && cd.getCloudDescriptor().getReplicaType() != Type.PULL) {
         getShardTerms(collection, shardId).doneRecovering(cd.getName());
@@ -1857,16 +1825,47 @@ public class ZkController implements Closeable, Runnable {
     return getCollectionTerms(collection).getShard(shardId);
   }
 
+  public ZkShardTerms getShardTermsOrNull(String collection, String shardId) {
+    ZkCollectionTerms ct = getCollectionTerms(collection);
+    if (ct == null) return null;
+    return ct.getShardOrNull(shardId);
+  }
+
+
+  public void removeCollectionTerms(String collection) {
+    collectionToTermsLock.lock();
+    try {
+      ZkCollectionTerms collectionTerms = collectionToTerms.remove(collection);
+      IOUtils.closeQuietly(collectionTerms);
+    } finally {
+      collectionToTermsLock.unlock();
+    }
+  }
+
   private ZkCollectionTerms getCollectionTerms(String collection) {
     collectionToTermsLock.lock();
     try {
-      if (!collectionToTerms.containsKey(collection)) collectionToTerms.put(collection, new ZkCollectionTerms(collection, zkClient));
       return collectionToTerms.get(collection);
     } finally {
       collectionToTermsLock.unlock();
     }
   }
 
+  private ZkCollectionTerms createCollectionTerms(String collection) {
+    collectionToTermsLock.lock();
+    try {
+      ZkCollectionTerms ct = collectionToTerms.get(collection);
+      if (ct != null) {
+        return ct;
+      }
+      ct = new ZkCollectionTerms(collection, zkClient);
+      IOUtils.closeQuietly(collectionToTerms.put(collection, ct));
+      return ct;
+    } finally {
+      collectionToTermsLock.unlock();
+    }
+  }
+
   public void clearZkCollectionTerms() {
     collectionToTermsLock.lock();
     try {
@@ -1877,44 +1876,39 @@ public class ZkController implements Closeable, Runnable {
     }
   }
 
-  public void unregister(String coreName, CoreDescriptor cd) throws Exception {
-    unregister(coreName, cd, true);
-  }
-
-  public void unregister(String coreName, CoreDescriptor cd, boolean removeCoreFromZk) throws Exception {
+  public void unregister(String coreName, CoreDescriptor cd) {
     log.info("Unregister core from zookeeper {}", coreName);
-
-    if (statePublisher != null) {
-      statePublisher.clearStatCache(coreName);
-    }
-
-    if (!zkClient.isConnected()) return;
     final String collection = cd.getCloudDescriptor().getCollectionName();
+    try {
+      collectionToTermsLock.lock();
+      try {
+        ZkCollectionTerms ct = collectionToTerms.get(collection);
+        if (ct != null) {
+          ct.remove(cd.getCloudDescriptor().getShardId(), cd);
+          if (ct.cleanUp()) IOUtils.closeQuietly(collectionToTerms.remove(collection));
+        }
 
-    ZkCollectionTerms ct = collectionToTerms.get(collection);
-    if (ct != null) {
-      ct.remove(cd.getCloudDescriptor().getShardId(), cd);
-    }
+      } finally {
+        collectionToTermsLock.unlock();
+      }
 
-    zkStateReader.unregisterCore(collection);
+      replicasMetTragicEvent.remove(collection + ":" + coreName);
 
-    replicasMetTragicEvent.remove(collection + ":" + coreName);
+      if (statePublisher != null) {
+        statePublisher.clearStatCache(coreName);
+      }
 
-    if (Strings.isNullOrEmpty(collection)) {
-      log.error("No collection was specified.");
-      assert false : "No collection was specified [" + collection + "]";
-      return;
+    } finally {
+      zkStateReader.unregisterCore(collection);
     }
-    final DocCollection docCollection = zkStateReader.getClusterState().getCollectionOrNull(collection);
-    Replica replica = (docCollection == null) ? null : docCollection.getReplica(coreName);
-
-    if (replica == null || replica.getType() != Type.PULL) {
-      ElectionContext context = electionContexts.remove(new ContextKey(collection, coreName));
+//    if (Strings.isNullOrEmpty(collection)) {
+//      log.error("No collection was specified.");
+//      assert false : "No collection was specified [" + collection + "]";
+//      return;
+//    }
+//    final DocCollection docCollection = zkStateReader.getClusterState().getCollectionOrNull(collection);
+//    Replica replica = (docCollection == null) ? null : docCollection.getReplica(coreName);
 
-      if (context != null) {
-        context.close();
-      }
-    }
   }
 
   public ZkStateReader getZkStateReader() {
@@ -2108,40 +2102,14 @@ public class ZkController implements Closeable, Runnable {
       closeAndDone = overseer.isCloseAndDone();
     } catch (NullPointerException e) {
       // okay
+      closeAndDone = true;
     }
-
-    if (overseerElector == null || isClosed() || shudownCalled || closeAndDone) {
+    ElectionContext context = overseerElector.getContext();
+    if (overseerElector == null || isClosed() || shudownCalled || closeAndDone || context == null) {
       return;
     }
     try {
-      String electionNode = overseerElector.getContext().electionPath;
-      if (electionNode != null) {
-        // Check whether we came to this node by mistake
-        if ( overseerElector.getContext() != null && overseerElector.getContext().leaderSeqPath == null 
-            && !overseerElector.getContext().leaderSeqPath.endsWith(electionNode)) {
-          log.warn("Asked to rejoin with wrong election node : {}, current node is {}", electionNode, overseerElector.getContext().leaderSeqPath);
-          //however delete it . This is possible when the last attempt at deleting the election node failed.
-          if (electionNode.startsWith(getNodeName())) {
-            try {
-              zkClient.delete(Overseer.OVERSEER_ELECT + LeaderElector.ELECTION_NODE + "/" + electionNode, -1);
-            } catch (NoNodeException e) {
-              //no problem
-            } catch (InterruptedException e) {
-              ParWork.propagateInterrupt(e);
-              return;
-            } catch (Exception e) {
-              log.warn("Old election node exists , could not be removed ", e);
-            }
-          }
-        } else { // We're in the right place, now attempt to rejoin
-          overseerElector.retryElection(new OverseerElectionContext(getNodeName(),
-              zkClient, overseer), joinAtHead);
-          return;
-        }
-      } else {
-        overseerElector.retryElection(new OverseerElectionContext(getNodeName(),
-               zkClient, overseer), joinAtHead);
-      }
+      overseerElector.retryElection(joinAtHead);
     } catch (Exception e) {
       ParWork.propagateInterrupt(e);
       throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to rejoin election", e);
@@ -2163,23 +2131,21 @@ public class ZkController implements Closeable, Runnable {
 
       ContextKey contextKey = new ContextKey(collectionName, coreName);
 
-      ElectionContext prevContext = electionContexts.get(contextKey);
-      if (prevContext != null) prevContext.close();
 
       Map<String, Object> props = new HashMap<>();
       props.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
 
       Replica replica = new Replica(coreName, props, collectionName, shardId, zkStateReader);
 
-      LeaderElector elect = ((ShardLeaderElectionContext) prevContext).getLeaderElector();
+      LeaderElector elect =  leaderElectors.get(replica.getName());
+
       ShardLeaderElectionContext context = new ShardLeaderElectionContext(elect, shardId, collectionName,
           coreName, replica, this, getCoreContainer());
 
       context.leaderSeqPath = context.electionPath + LeaderElector.ELECTION_NODE + "/" + electionNode;
       elect.setup(context);
-      prevContext = electionContexts.put(contextKey, context);
-      if (prevContext != null) prevContext.close();
-      elect.retryElection(context, params.getBool(REJOIN_AT_HEAD_PROP, false));
+
+      elect.retryElection(params.getBool(REJOIN_AT_HEAD_PROP, false));
     } catch (Exception e) {
       ParWork.propagateInterrupt(e);
       throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to rejoin election", e);
@@ -2263,8 +2229,7 @@ public class ZkController implements Closeable, Runnable {
                                                          String resourceName, byte[] content,
                                                          boolean createIfNotExists) {
     int latestVersion = znodeVersion;
-    final ZkController zkController = zkLoader.getZkController();
-    final SolrZkClient zkClient = zkController.getZkClient();
+    final SolrZkClient zkClient = zkLoader.getZkClient();
     final String resourceLocation = zkLoader.getConfigSetZkPath() + "/" + resourceName;
     String errMsg = "Failed to persist resource at {0} - old {1}";
     try {
@@ -2324,7 +2289,7 @@ public class ZkController implements Closeable, Runnable {
   }
 
   public static void touchConfDir(ZkSolrResourceLoader zkLoader) {
-    SolrZkClient zkClient = zkLoader.getZkController().getZkClient();
+    SolrZkClient zkClient = zkLoader.getZkClient();
     try {
       zkClient.setData(zkLoader.getConfigSetZkPath(), new byte[]{0}, true);
     } catch (Exception e) {
@@ -2399,7 +2364,7 @@ public class ZkController implements Closeable, Runnable {
     return confDirListeners;
   }
 
-  private final Map<String, Set<Runnable>> confDirectoryListeners = new HashMap<>();
+  private final Map<String, Set<Runnable>> confDirectoryListeners = new ConcurrentHashMap<>();
 
   private class
   WatcherImpl implements Watcher {
@@ -2415,7 +2380,9 @@ public class ZkController implements Closeable, Runnable {
       if (Event.EventType.None.equals(event.getType()) || isClosed() || cc.isShutDown()) {
         return;
       }
-
+      if (isClosed() || getCoreContainer().isShutDown() || isDcCalled()) {
+        return;
+      }
       Stat stat = null;
       try {
         stat = zkClient.exists(zkDir, null);
@@ -2443,32 +2410,33 @@ public class ZkController implements Closeable, Runnable {
   }
 
   private boolean fireEventListeners(String zkDir) {
-    if (isClosed || cc.isShutDown()) {
+    if (cc.isShutDown()) {
       return false;
     }
-    synchronized (confDirectoryListeners) {
-      // if this is not among directories to be watched then don't set the watcher anymore
-      if (!confDirectoryListeners.containsKey(zkDir)) {
-        log.debug("Watcher on {} is removed ", zkDir);
-        return false;
-      }
+
+    // if this is not among directories to be watched then don't set the watcher anymore
+    if (!confDirectoryListeners.containsKey(zkDir)) {
+      log.debug("Watcher on {} is removed ", zkDir);
+      return false;
     }
+
     final Set<Runnable> listeners = confDirectoryListeners.get(zkDir);
     if (listeners != null) {
 
       // run these in a separate thread because this can be long running
-
-      try (ParWork worker = new ParWork(this, true)) {
-        listeners
-            .forEach((it) -> worker.collect("confDirectoryListener", () -> {
-              it.run();
-            }));
+      if (cc.isShutDown() || isDcCalled()) {
+        return false;
       }
+      listeners.forEach((it) -> it.run());
+
     }
     return true;
   }
 
   private void setConfWatcher(String zkDir, Watcher watcher, Stat stat) {
+    if (isClosed() || isDcCalled() || getCoreContainer().isShutDown()) {
+      return;
+    }
     try {
       Stat newStat = zkClient.exists(zkDir, watcher);
       if (stat != null && newStat.getVersion() > stat.getVersion()) {
@@ -2487,12 +2455,18 @@ public class ZkController implements Closeable, Runnable {
   }
 
   public OnReconnect getConfigDirListener() {
-    return () -> {
-      synchronized (confDirectoryListeners) {
-        for (String s : confDirectoryListeners.keySet()) {
-          setConfWatcher(s, new WatcherImpl(s), null);
-          fireEventListeners(s);
-        }
+    return new OnReconnect() {
+      @Override
+      public void command() throws SessionExpiredException {
+          confDirectoryListeners.forEach((s, runnables) -> {
+            setConfWatcher(s, new WatcherImpl(s), null);
+            fireEventListeners(s);
+          });
+      }
+
+      @Override
+      public String getName() {
+        return null;
       }
     };
   }
@@ -2563,14 +2537,9 @@ public class ZkController implements Closeable, Runnable {
   public boolean checkIfCoreNodeNameAlreadyExists(CoreDescriptor dcore) {
     DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(dcore.getCollectionName());
     if (collection != null) {
-      Collection<Slice> slices = collection.getSlices();
-
-      for (Slice slice : slices) {
-        Collection<Replica> replicas = slice.getReplicas();
-        Replica r = slice.getReplica(dcore.getName());
-        if (r != null) {
-          return true;
-        }
+      Replica r = collection.getReplica(dcore.getName());
+      if (r != null) {
+        return true;
       }
     }
     return false;
@@ -2630,9 +2599,10 @@ public class ZkController implements Closeable, Runnable {
    * Ensures that a searcher is registered for the given core and if not, waits until one is registered
    */
   private static void ensureRegisteredSearcher(SolrCore core) throws InterruptedException {
-    if (core.isClosed() || core.getCoreContainer().isShutDown()) {
+    if (core.getCoreContainer().isShutDown()) {
       return;
     }
+    log.info("ensureRegisteredSearcher");
     if (!core.getSolrConfig().useColdSearcher) {
       RefCounted<SolrIndexSearcher> registeredSearcher = core.getRegisteredSearcher();
       if (registeredSearcher != null) {
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java b/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java
index 70a61e5..0b07a52 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java
@@ -16,15 +16,13 @@
  */
 package org.apache.solr.cloud;
 
-import com.codahale.metrics.Timer;
 import org.apache.solr.client.solrj.cloud.DistributedQueue;
 import org.apache.solr.common.cloud.ConnectionManager.IsClosed;
 import org.apache.solr.common.cloud.SolrZkClient;
-import org.apache.solr.common.util.Pair;
-import org.apache.solr.common.util.Utils;
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.Op;
+import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;
 import org.apache.zookeeper.data.Stat;
 import org.slf4j.Logger;
@@ -38,13 +36,8 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.NoSuchElementException;
 import java.util.Set;
-import java.util.TreeSet;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.locks.Condition;
-import java.util.concurrent.locks.ReentrantLock;
-import java.util.function.Predicate;
+import java.util.concurrent.CountDownLatch;
 
 /**
  * <p>A ZK-based distributed queue. Optimized for single-consumer,
@@ -80,37 +73,8 @@ public class ZkDistributedQueue implements DistributedQueue {
   final SolrZkClient zookeeper;
 
   final Stats stats;
-
-  /**
-   * A lock that guards all of the mutable state that follows.
-   */
-  private final ReentrantLock updateLock = new ReentrantLock();
-
-  /**
-   * Contains the last set of children fetched from ZK. Elements are removed from the head of
-   * this in-memory set as they are consumed from the queue.  Due to the distributed nature
-   * of the queue, elements may appear in this set whose underlying nodes have been consumed in ZK.
-   * Therefore, methods like {@link #peek()} have to double-check actual node existence, and methods
-   * like {@link #poll()} must resolve any races by attempting to delete the underlying node.
-   */
-  private TreeSet<String> knownChildren = new TreeSet<>();
-
-  /**
-   * Used to wait on ZK changes to the child list; you must hold {@link #updateLock} before waiting on this condition.
-   */
-  private final Condition changed = updateLock.newCondition();
-
-  private boolean isDirty = true;
-
-  private int watcherCount = 0;
-
   private final int maxQueueSize;
 
-  /**
-   * If {@link #maxQueueSize} is set, the number of items we can queue without rechecking the server.
-   */
-  private final AtomicInteger offerPermits = new AtomicInteger(0);
-
   public ZkDistributedQueue(SolrZkClient zookeeper, String dir) {
     this(zookeeper, dir, new Stats());
   }
@@ -131,102 +95,6 @@ public class ZkDistributedQueue implements DistributedQueue {
     this.maxQueueSize = maxQueueSize;
   }
 
-  /**
-   * Returns the data at the first element of the queue, or null if the queue is
-   * empty.
-   *
-   * @return data at the first element of the queue, or null.
-   */
-  @Override
-  public byte[] peek() throws KeeperException, InterruptedException {
-    throw new UnsupportedOperationException();
-  }
-
-  /**
-   * Returns the data at the first element of the queue, or null if the queue is
-   * empty and block is false.
-   *
-   * @param block if true, blocks until an element enters the queue
-   * @return data at the first element of the queue, or null.
-   */
-  @Override
-  public byte[] peek(boolean block) throws KeeperException, InterruptedException {
-    throw new UnsupportedOperationException();
-  }
-
-  /**
-   * Returns the data at the first element of the queue, or null if the queue is
-   * empty after wait ms.
-   *
-   * @param wait max wait time in ms.
-   * @return data at the first element of the queue, or null.
-   */
-  @Override
-  public byte[] peek(long wait) throws KeeperException, InterruptedException {
-    throw new UnsupportedOperationException();
-  }
-
-  /**
-   * Attempts to remove the head of the queue and return it. Returns null if the
-   * queue is empty.
-   *
-   * @return Head of the queue or null.
-   */
-  @Override
-  public byte[] poll() throws KeeperException, InterruptedException {
-    throw new UnsupportedOperationException();
-  }
-
-  /**
-   * Attempts to remove the head of the queue and return it.
-   *
-   * @return The former head of the queue
-   */
-  @Override
-  public byte[] remove() throws NoSuchElementException, KeeperException, InterruptedException {
-    Timer.Context time = stats.time(dir + "_remove");
-    try {
-      byte[] result = removeFirst();
-      if (result == null) {
-        throw new NoSuchElementException();
-      }
-      return result;
-    } finally {
-      time.stop();
-    }
-  }
-
-//  public void remove(Collection<String> paths) throws KeeperException, InterruptedException {
-//
-//    if (log.isDebugEnabled()) log.debug("Remove paths from queue dir={} paths={}", dir, paths);
-//
-//    if (paths.isEmpty()) {
-//      if (log.isDebugEnabled()) log.debug("paths is empty, return");
-//      return;
-//    }
-//
-//    List<String> fullPaths = new ArrayList<>(paths.size());
-//    for (String node : paths) {
-//      fullPaths.add(dir + "/" + node);
-//    }
-//
-//
-//    if (log.isDebugEnabled()) log.debug("delete nodes {}", fullPaths);
-//    zookeeper.delete(fullPaths, false);
-//    if (log.isDebugEnabled()) log.debug("after delete nodes");
-//
-//    int cacheSizeBefore = knownChildren.size();
-//    knownChildren.removeAll(paths);
-//    if (cacheSizeBefore - paths.size() == knownChildren.size() && knownChildren.size() != 0) {
-//      stats.setQueueLength(knownChildren.size());
-//    } else {
-//      // There are elements get deleted but not present in the cache,
-//      // the cache seems not valid anymore
-//      knownChildren.clear();
-//      isDirty = true;
-//    }
-//  }
-
   public void remove(Collection<String> paths) throws KeeperException, InterruptedException {
     if (paths.isEmpty()) return;
     List<Op> ops = new ArrayList<>();
@@ -252,27 +120,6 @@ public class ZkDistributedQueue implements DistributedQueue {
         }
       }
     }
-
-    int cacheSizeBefore = knownChildren.size();
-    knownChildren.removeAll(paths);
-    if (cacheSizeBefore - paths.size() == knownChildren.size() && knownChildren.size() != 0) {
-      stats.setQueueLength(knownChildren.size());
-    } else {
-      // There are elements get deleted but not present in the cache,
-      // the cache seems not valid anymore
-      knownChildren.clear();
-      isDirty = true;
-    }
-  }
-
-  /**
-   * Removes the head of the queue and returns it, blocks until it succeeds.
-   *
-   * @return The former head of the queue
-   */
-  @Override
-  public byte[] take() throws KeeperException, InterruptedException {
-    throw new UnsupportedOperationException();
   }
 
   private static Set<String> OPERATIONS = new HashSet<>();
@@ -289,6 +136,40 @@ public class ZkDistributedQueue implements DistributedQueue {
    */
   @Override
   public void offer(byte[] data) throws KeeperException, InterruptedException {
+    CountDownLatch latch = new CountDownLatch(1);
+    Stat stat = zookeeper.exists(dir, new Watcher() {
+      @Override
+      public void process(WatchedEvent event) {
+        if (event.getType() == Watcher.Event.EventType.NodeChildrenChanged) {
+          try {
+            Stat stat = zookeeper.exists(dir, this);
+            if (stat.getNumChildren() <= 15) {
+              latch.countDown();
+              try {
+                zookeeper.getSolrZooKeeper().removeWatches(dir, this, WatcherType.Any, true);
+              } catch (Exception e) {
+                log.info("could not remove watch {} {}", e.getClass().getSimpleName(), e.getMessage());
+              }
+            }
+          } catch (Exception e) {
+            latch.countDown();
+            log.error("", e);
+          }
+          return;
+        }
+        try {
+          zookeeper.exists(dir, this);
+        } catch (Exception e) {
+          latch.countDown();
+          log.error("", e);
+        }
+      }
+    });
+
+    if (stat.getNumChildren() > 15) {
+      latch.await();
+    }
+
     // TODO - if too many items on the queue, just block
     zookeeper.create(dir + "/" + PREFIX, data, CreateMode.PERSISTENT_SEQUENTIAL, true);
     return;
@@ -298,6 +179,10 @@ public class ZkDistributedQueue implements DistributedQueue {
     return stats;
   }
 
+  public SolrZkClient getZookeeper() {
+    return zookeeper;
+  }
+
   @Override
   public Map<String, Object> getStats() {
     if (stats == null) {
@@ -322,58 +207,4 @@ public class ZkDistributedQueue implements DistributedQueue {
     });
     return res;
   }
-
-  /**
-   * Returns the name if the first known child node, or {@code null} if the queue is empty.
-   * This is the only place {@link #knownChildren} is ever updated!
-   * The caller must double check that the actual node still exists, since the in-memory
-   * list is inherently stale.
-   */
-  private String firstChild(boolean remove, boolean refetchIfDirty) throws KeeperException, InterruptedException {
-    throw new UnsupportedOperationException();
-  }
-
-  /**
-   * Return the current set of children from ZK; does not change internal state.
-   */
-  TreeSet<String> fetchZkChildren(Watcher watcher) throws InterruptedException, KeeperException {
-    throw new UnsupportedOperationException();
-  }
-
-  /**
-   * Return the currently-known set of elements, using child names from memory. If no children are found, or no
-   * children pass {@code acceptFilter}, waits up to {@code waitMillis} for at least one child to become available.
-   * <p>
-   * Package-private to support {@link OverseerTaskQueue} specifically.</p>
-   */
-  @Override
-  public Collection<Pair<String, byte[]>> peekElements(int max, long waitMillis, Predicate<String> acceptFilter) throws KeeperException, InterruptedException {
-    throw new UnsupportedOperationException();
-  }
-
-  private byte[] removeFirst() throws KeeperException, InterruptedException {
-    while (true) {
-      String firstChild = firstChild(true, false);
-      if (firstChild == null) {
-        return null;
-      }
-      try {
-        String path = dir + "/" + firstChild;
-        byte[] result = zookeeper.getData(path, null, null, true);
-        zookeeper.delete(path, -1, true);
-        stats.setQueueLength(knownChildren.size());
-        return result;
-      } catch (KeeperException.NoNodeException e) {
-        // Another client deleted the node first, remove the in-memory and retry.
-        updateLock.lockInterruptibly();
-        try {
-          // Efficient only for single-consumer
-          knownChildren.clear();
-          isDirty = true;
-        } finally {
-          updateLock.unlock();
-        }
-      }
-    }
-  }
 }
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java b/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
index 618c3ce..872d639 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
@@ -22,9 +22,9 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.concurrent.locks.ReentrantLock;
 
 import org.apache.solr.client.solrj.cloud.ShardTerms;
 import org.apache.solr.common.AlreadyClosedException;
@@ -75,6 +75,7 @@ public class ZkShardTerms implements AutoCloseable{
   private final AtomicBoolean isClosed = new AtomicBoolean(false);
 
   private final AtomicReference<ShardTerms> terms = new AtomicReference<>();
+  private ReentrantLock termsLock = new ReentrantLock(true);
 
   /**
    * Listener of a core for shard's term change events
@@ -93,6 +94,8 @@ public class ZkShardTerms implements AutoCloseable{
      * @return true if the listener wanna to be triggered in the next time
      */
     boolean onTermChanged(ShardTerms terms);
+
+    void close();
   }
 
   public ZkShardTerms(String collection, String shard, SolrZkClient zkClient) {
@@ -116,7 +119,7 @@ public class ZkShardTerms implements AutoCloseable{
 
     ShardTerms newTerms;
     while( (newTerms = terms.get().increaseTerms(leader, replicasNeedingRecovery)) != null) {
-      if (forceSaveTerms(newTerms)) return;
+      if (forceSaveTerms(newTerms) || isClosed.get()) return;
     }
   }
 
@@ -138,6 +141,8 @@ public class ZkShardTerms implements AutoCloseable{
    * @return true if this replica has term equals to leader's term, false if otherwise
    */
   public boolean skipSendingUpdatesTo(String coreNodeName) {
+    if (log.isDebugEnabled()) log.debug("skipSendingUpdatesTo {} {}", coreNodeName, terms);
+
     return !terms.get().haveHighestTermValue(coreNodeName);
   }
 
@@ -147,15 +152,20 @@ public class ZkShardTerms implements AutoCloseable{
    * @return true if this replica registered its term, false if otherwise
    */
   public boolean registered(String coreNodeName) {
-    return terms.get().getTerm(coreNodeName) != null;
+    ShardTerms t = terms.get();
+    if (t == null) {
+      return false;
+    }
+    return t.getTerm(coreNodeName) != null;
   }
 
   public void close() {
     // no watcher will be registered
     isClosed.set(true);
 
+    ParWork.close(listeners);
     listeners.clear();
-
+    terms.set(null);
     assert ObjectReleaseTracker.release(this);
   }
 
@@ -168,9 +178,6 @@ public class ZkShardTerms implements AutoCloseable{
    * Add a listener so the next time the shard's term get updated, listeners will be called
    */
   void addListener(CoreTermWatcher listener) {
-    if (isClosed.get()) {
-      throw new AlreadyClosedException();
-    }
     listeners.add(listener);
   }
 
@@ -184,7 +191,7 @@ public class ZkShardTerms implements AutoCloseable{
     listeners.removeIf(coreTermWatcher -> !coreTermWatcher.onTermChanged(terms.get()));
     numListeners = listeners.size();
 
-    return removeTerm(cd.getName()) || numListeners == 0;
+    return removeTerm(cd.getName());
   }
 
   // package private for testing, only used by tests
@@ -199,7 +206,7 @@ public class ZkShardTerms implements AutoCloseable{
         return true;
       }
       tries++;
-      if (tries > 30) {
+      if (tries > 60 || isClosed.get()) {
         log.warn("Could not save terms to zk within " + tries + " tries");
         return true;
       }
@@ -215,7 +222,7 @@ public class ZkShardTerms implements AutoCloseable{
   void registerTerm(String coreNodeName) {
     ShardTerms newTerms;
     while ( (newTerms = terms.get().registerTerm(coreNodeName)) != null) {
-      if (forceSaveTerms(newTerms)) break;
+      if (forceSaveTerms(newTerms) || isClosed.get()) break;
     }
   }
 
@@ -227,14 +234,14 @@ public class ZkShardTerms implements AutoCloseable{
   public void setTermEqualsToLeader(String coreNodeName) {
     ShardTerms newTerms;
     while ( (newTerms = terms.get().setTermEqualsToLeader(coreNodeName)) != null) {
-      if (forceSaveTerms(newTerms)) break;
+      if (forceSaveTerms(newTerms) || isClosed.get()) break;
     }
   }
 
   public void setTermToZero(String coreNodeName) {
     ShardTerms newTerms;
     while ( (newTerms = terms.get().setTermToZero(coreNodeName)) != null) {
-      if (forceSaveTerms(newTerms)) break;
+      if (forceSaveTerms(newTerms) || isClosed.get()) break;
     }
   }
 
@@ -244,7 +251,7 @@ public class ZkShardTerms implements AutoCloseable{
   public void startRecovering(String coreNodeName) {
     ShardTerms newTerms;
     while ( (newTerms = terms.get().startRecovering(coreNodeName)) != null) {
-      if (forceSaveTerms(newTerms)) break;
+      if (forceSaveTerms(newTerms) || isClosed.get()) break;
     }
   }
 
@@ -254,7 +261,7 @@ public class ZkShardTerms implements AutoCloseable{
   public void doneRecovering(String coreNodeName) {
     ShardTerms newTerms;
     while ( (newTerms = terms.get().doneRecovering(coreNodeName)) != null) {
-      if (forceSaveTerms(newTerms)) break;
+      if (forceSaveTerms(newTerms) || isClosed.get()) break;
     }
   }
 
@@ -269,7 +276,7 @@ public class ZkShardTerms implements AutoCloseable{
   public void ensureHighestTermsAreNotZero() {
     ShardTerms newTerms;
     while ( (newTerms = terms.get().ensureHighestTermsAreNotZero()) != null) {
-      if (forceSaveTerms(newTerms)) break;
+      if (forceSaveTerms(newTerms) || isClosed.get()) break;
     }
   }
 
@@ -335,11 +342,13 @@ public class ZkShardTerms implements AutoCloseable{
     try {
       Stat stat = new Stat();
       byte[] data = zkClient.getData(znodePath, null, stat, true);
-      newTerms = new ShardTerms((Map<String, Long>) Utils.fromJSON(data), stat.getVersion());
+      ConcurrentHashMap<String,Long> values = new ConcurrentHashMap<>((Map<String,Long>) Utils.fromJSON(data));
+      log.info("refresh shard terms to zk version {}", stat.getVersion());
+      newTerms = new ShardTerms(values, stat.getVersion());
     } catch (KeeperException.NoNodeException e) {
-      if (log.isDebugEnabled()) log.debug("No node found for refresh terms", e);
+      log.warn("No node found for shard terms", e);
       // we have likely been deleted
-      return;
+      throw new AlreadyClosedException(e);
     } catch (InterruptedException e) {
       ParWork.propagateInterrupt(e);
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error updating shard term for collection: " + collection, e);
@@ -364,13 +373,13 @@ public class ZkShardTerms implements AutoCloseable{
         return;
       } catch (KeeperException e) {
         log.warn("Failed watching shard term for collection: {}, retrying!", collection, e);
-        try {
-          zkClient.getConnectionManager().waitForConnected(zkClient.getZkClientTimeout());
-        } catch (TimeoutException | InterruptedException te) {
-          if (Thread.interrupted()) {
-            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error watching shard term for collection: " + collection, te);
-          }
-        }
+//        try {
+//          zkClient.getConnectionManager().waitForConnected(zkClient.getZkClientTimeout());
+//        } catch (TimeoutException | InterruptedException te) {
+//          if (Thread.interrupted()) {
+//            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error watching shard term for collection: " + collection, te);
+//          }
+//        }
       }
     }
   }
@@ -404,18 +413,25 @@ public class ZkShardTerms implements AutoCloseable{
    */
   private void setNewTerms(ShardTerms newTerms) {
     boolean isChanged = false;
-    for (;;)  {
-      ShardTerms terms = this.terms.get();
-      if (terms == null || newTerms.getVersion() > terms.getVersion())  {
-        if (this.terms.compareAndSet(terms, newTerms))  {
-          isChanged = true;
+    termsLock.lock();
+    try {
+      for (;;)  {
+        ShardTerms terms = this.terms.get();
+        if (terms == null || newTerms.getVersion() > terms.getVersion())  {
+          if (this.terms.compareAndSet(terms, newTerms))  {
+            isChanged = true;
+            break;
+          }
+        } else  {
+          break;
+        }
+        if (isClosed.get()) {
           break;
         }
-      } else  {
-        break;
       }
+    } finally {
+      termsLock.unlock();
     }
-
     if (isChanged) onTermUpdates(newTerms);
   }
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkSolrResourceLoader.java b/solr/core/src/java/org/apache/solr/cloud/ZkSolrResourceLoader.java
index c672c18..850a33c 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkSolrResourceLoader.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkSolrResourceLoader.java
@@ -25,6 +25,7 @@ import java.nio.file.Path;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.solr.common.ParWork;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkConfigManager;
 import org.apache.solr.core.SolrResourceLoader;
 import org.apache.solr.core.SolrResourceNotFoundException;
@@ -41,10 +42,11 @@ import org.slf4j.LoggerFactory;
 public class ZkSolrResourceLoader extends SolrResourceLoader implements ResourceLoader {
 
   private final String configSetZkPath;
-  private ZkController zkController;
+
   private ZkIndexSchemaReader zkIndexSchemaReader;
 
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  private final SolrZkClient zkClient;
 
   /**
    * <p>
@@ -56,7 +58,7 @@ public class ZkSolrResourceLoader extends SolrResourceLoader implements Resource
   public ZkSolrResourceLoader(Path instanceDir, String configSet, ClassLoader parent,
                               ZkController zooKeeperController) {
     super(instanceDir, parent);
-    this.zkController = zooKeeperController;
+    this.zkClient = zooKeeperController.getZkClient();
     configSetZkPath = ZkConfigManager.CONFIGS_ZKNODE + "/" + configSet;
   }
 
@@ -67,16 +69,16 @@ public class ZkSolrResourceLoader extends SolrResourceLoader implements Resource
    */
   @Override
   public InputStream openResource(String resource) throws IOException {
-    InputStream is;
+
     String file = (".".equals(resource)) ? configSetZkPath : configSetZkPath + "/" + resource;
     if (log.isDebugEnabled()) log.debug("open resource {}", resource);
 
     try {
 
       Stat stat = new Stat();
-      byte[] bytes = zkController.getZkClient().getData(file, null, stat);
+      byte[] bytes = zkClient.getData(file, null, stat);
       if (bytes == null) {
-        log.error("resource not found {}", resource);
+        if (log.isDebugEnabled()) log.debug("resource not found {}", resource);
         throw new SolrResourceNotFoundException("Can't find resource '" + resource
                 + "' in classpath or '" + configSetZkPath + "', cwd="
                 + System.getProperty("user.dir"));
@@ -96,6 +98,10 @@ public class ZkSolrResourceLoader extends SolrResourceLoader implements Resource
     }
   }
 
+  public SolrZkClient getZkClient() {
+    return zkClient;
+  }
+
   public static class ZkByteArrayInputStream extends ByteArrayInputStream{
 
     private final Stat stat;
@@ -113,10 +119,6 @@ public class ZkSolrResourceLoader extends SolrResourceLoader implements Resource
   public String getConfigSetZkPath() {
     return configSetZkPath;
   }
-  
-  public ZkController getZkController() {
-    return zkController;
-  }
 
   public void setZkIndexSchemaReader(ZkIndexSchemaReader zkIndexSchemaReader) {
     this.zkIndexSchemaReader = zkIndexSchemaReader;
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/AddReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/AddReplicaCmd.java
index 5d36aa5..68a2c59 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/AddReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/AddReplicaCmd.java
@@ -114,7 +114,7 @@ public class AddReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
       ShardRequestTracker shardRequestTracker, @SuppressWarnings({"rawtypes"})NamedList results)
       throws IOException, InterruptedException, KeeperException {
 
-    if (log.isDebugEnabled())  log.debug("addReplica() : {}", Utils.toJSONString(message));
+    log.info("addReplica() : {}", Utils.toJSONString(message));
 
     String extCollectionName = message.getStr(COLLECTION_PROP);
     boolean followAliases = message.getBool(FOLLOW_ALIASES, false);
@@ -205,7 +205,7 @@ public class AddReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
           public Response call() {
             if (!onlyUpdateState && createdShardHandler) {
               try {
-                if (log.isDebugEnabled())  log.debug("Processs responses");
+                 log.info("Processs responses");
                 shardRequestTracker.processResponses(results, shardHandler, true, "ADDREPLICA failed to create replica");
               } catch (Exception e) {
                 ParWork.propagateInterrupt(e);
@@ -236,7 +236,7 @@ public class AddReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
     }
     try {
       log.info("waiting for created replicas shard={} {}", shard, coreNames);
-      zkStateReader.waitForState(collectionName, 15, TimeUnit.SECONDS, (liveNodes, collectionState) -> { // nocommit timeout
+      zkStateReader.waitForState(collectionName, 30, TimeUnit.SECONDS, (liveNodes, collectionState) -> { // nocommit timeout
         if (collectionState == null) {
           return false;
         }
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/AliasCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/AliasCmd.java
index 349ba3d..86a3ba1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/AliasCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/AliasCmd.java
@@ -17,13 +17,12 @@
 
 package org.apache.solr.cloud.api.collections;
 
-import org.apache.solr.client.solrj.impl.BaseCloudSolrClient;
 import org.apache.solr.cloud.Overseer;
+import org.apache.solr.cloud.OverseerSolrResponse;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.CollectionProperties;
 import org.apache.solr.common.cloud.ZkNodeProps;
-import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.handler.admin.CollectionsHandler;
@@ -34,7 +33,6 @@ import static org.apache.solr.cloud.api.collections.RoutedAlias.ROUTED_ALIAS_NAM
 import static org.apache.solr.common.params.CollectionAdminParams.COLL_CONF;
 import static org.apache.solr.common.params.CommonParams.NAME;
 import java.util.Map;
-import java.util.concurrent.TimeUnit;
 
 /**
  * Common superclass for commands that maintain or manipulate aliases. In the routed alias parlance, "maintain"
@@ -82,7 +80,9 @@ abstract class AliasCmd implements OverseerCollectionMessageHandler.Cmd {
       // Since we are running in the Overseer here, send the message directly to the Overseer CreateCollectionCmd.
       // note: there's doesn't seem to be any point in locking on the collection name, so we don't. We currently should
       //   already have a lock on the alias name which should be sufficient.
-      ocmh.commandMap.get(CollectionParams.CollectionAction.CREATE).call(clusterState, zkProps, results);
+
+
+      CollectionsHandler.sendToOCPQueue(ocmh.overseer.getCoreContainer(), zkProps, 30000);
     } catch (SolrException e) {
       // The collection might already exist, and that's okay -- we can adopt it.
       if (!e.getMessage().contains("collection already exists")) {
@@ -90,9 +90,8 @@ abstract class AliasCmd implements OverseerCollectionMessageHandler.Cmd {
       }
     }
 
-
-    int numShards = BaseCloudSolrClient.getShardNames(zkProps).size();
-    ocmh.zkStateReader.waitForActiveCollection(createCollName, 60, TimeUnit.SECONDS, numShards, numShards * BaseCloudSolrClient.getTotalReplicas(zkProps));
+    CollectionsHandler.waitForActiveCollection(createCollName, ocmh.overseer.getCoreContainer(),
+        new OverseerSolrResponse(results));
     CollectionProperties collectionProperties = new CollectionProperties(ocmh.zkStateReader);
     collectionProperties.setCollectionProperty(createCollName,ROUTED_ALIAS_NAME_CORE_PROP,aliasName);
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java
index 7869424..23b9424 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java
@@ -212,7 +212,7 @@ public class Assign {
       createNodeList = createNodeSet == null ? null : new ArrayList<>(new LinkedHashSet<>(StrUtils.splitSmart((String) createNodeSet, ",", true)));
     }
     String collectionName = collection.getName();
-    HashMap<String, ReplicaCount> nodeNameVsShardCount = getNodeNameVsShardCount(collectionName, clusterState, cloudManager.getClusterStateProvider().getClusterState().getLiveNodes(), createNodeList);
+    HashMap<String, ReplicaCount> nodeNameVsShardCount = getNodeNameVsShardCount(collectionName, clusterState, cloudManager.getClusterStateProvider().getLiveNodes(), createNodeList);
 
 
     AssignRequest assignRequest = new AssignRequestBuilder()
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
index 0c3bf3a..85717f7 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
@@ -61,6 +61,7 @@ import org.apache.zookeeper.KeeperException.NoNodeException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.cloud.Overseer.QUEUE_OPERATION;
 import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE;
 import static org.apache.solr.common.cloud.ZkStateReader.NRT_REPLICAS;
 import static org.apache.solr.common.cloud.ZkStateReader.PULL_REPLICAS;
@@ -105,6 +106,36 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
   }
 
   @Override
+  public boolean cleanup(ZkNodeProps message) {
+    final String collectionName = message.getStr(NAME);
+    boolean activeAndLive = false;
+    DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(collectionName);
+    if (collection != null) {
+      Collection<Slice> slices = collection.getSlices();
+      for (Slice slice : slices) {
+
+        if (slice.getLeader() != null && slice.getLeader().isActive(zkStateReader.getLiveNodes())) {
+          activeAndLive = true;
+        }
+      }
+      if (!activeAndLive) {
+        ZkNodeProps m = new ZkNodeProps();
+        try {
+          m.getProperties().put(QUEUE_OPERATION, "delete");
+          m.getProperties().put(NAME, collectionName);
+          ocmh.overseer.getCoreContainer().getZkController().getOverseerCollectionQueue().offer(Utils.toJSON(m), 15000);
+          return false;
+        } catch (KeeperException e) {
+          log.error("", e);
+        } catch (InterruptedException e) {
+          log.error("", e);
+        }
+      }
+    }
+    return true;
+  }
+
+  @Override
   @SuppressWarnings({"unchecked"})
   public AddReplicaCmd.Response call(ClusterState clusterState, ZkNodeProps message, @SuppressWarnings({"rawtypes"})NamedList results) throws Exception {
     log.info("CreateCollectionCmd {}", message);
@@ -182,7 +213,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
       try {
         replicaPositions = buildReplicaPositions(cloudManager, message, shardNames);
       } catch (Exception e) {
-        log.error("", e);
+        log.error("Exception building replica positions", e);
         // unwrap the exception
         throw new SolrException(ErrorCode.BAD_REQUEST, e.getMessage(), e.getCause());
       }
@@ -224,9 +255,10 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
         ZkNodeProps props = new ZkNodeProps();
         //props.getProperties().putAll(message.getProperties());
         ZkNodeProps addReplicaProps = new ZkNodeProps(Overseer.QUEUE_OPERATION, ADDREPLICA.toString(), ZkStateReader.COLLECTION_PROP, collectionName, ZkStateReader.SHARD_ID_PROP,
-            replicaPosition.shard, ZkStateReader.CORE_NAME_PROP, coreName, ZkStateReader.STATE_PROP, Replica.State.DOWN.toString(), ZkStateReader.NODE_NAME_PROP,
-            nodeName, "node", nodeName, ZkStateReader.REPLICA_TYPE, replicaPosition.type.name(), ZkStateReader.NUM_SHARDS_PROP, message.getStr(ZkStateReader.NUM_SHARDS_PROP), "shards",
-            message.getStr("shards"), CommonAdminParams.WAIT_FOR_FINAL_STATE, Boolean.toString(waitForFinalState)); props.getProperties().putAll(addReplicaProps.getProperties());
+            replicaPosition.shard, ZkStateReader.CORE_NAME_PROP, coreName, ZkStateReader.STATE_PROP, Replica.State.DOWN.toString(), ZkStateReader.NODE_NAME_PROP, nodeName, "node", nodeName,
+            ZkStateReader.REPLICA_TYPE, replicaPosition.type.name(), ZkStateReader.NUM_SHARDS_PROP, message.getStr(ZkStateReader.NUM_SHARDS_PROP), "shards", message.getStr("shards"),
+            CommonAdminParams.WAIT_FOR_FINAL_STATE, Boolean.toString(waitForFinalState));
+        props.getProperties().putAll(addReplicaProps.getProperties());
         if (log.isDebugEnabled()) log.debug("Sending state update to populate clusterstate with new replica {}", props);
 
         clusterState = new AddReplicaCmd(ocmh, true).call(clusterState, props, results).clusterState;
@@ -298,93 +330,98 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
     if (log.isDebugEnabled()) log.debug("CreateCollectionCmd clusterstate={}", clusterState);
     AddReplicaCmd.Response response = new AddReplicaCmd.Response();
 
-    if (results.get("failure") == null && results.get("exception") == null) {
-      List<ReplicaPosition> finalReplicaPositions = replicaPositions;
-      response.asyncFinalRunner = new OverseerCollectionMessageHandler.Finalize() {
-        @Override
-        public AddReplicaCmd.Response call() {
-          try {
-            shardRequestTracker.processResponses(results, shardHandler, false, null, Collections.emptySet());
-          } catch (KeeperException e) {
-            log.error("", e);
-            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
-          } catch (InterruptedException e) {
-            ParWork.propagateInterrupt(e);
-            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
-          }
-          //  nocommit - put this in finalizer and finalizer after all calls to allow parallel and forward momentum
+    List<ReplicaPosition> finalReplicaPositions = replicaPositions;
+    response.asyncFinalRunner = new OverseerCollectionMessageHandler.Finalize() {
+      @Override
+      public AddReplicaCmd.Response call() {
+        try {
+          shardRequestTracker.processResponses(results, shardHandler, false, null, Collections.emptySet());
+        } catch (KeeperException e) {
+          log.error("", e);
+          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+        } catch (InterruptedException e) {
+          ParWork.propagateInterrupt(e);
+          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+        }
+        //  nocommit - put this in finalizer and finalizer after all calls to allow parallel and forward momentum
 
-          AddReplicaCmd.Response response = new AddReplicaCmd.Response();
+        AddReplicaCmd.Response response = new AddReplicaCmd.Response();
 
-          @SuppressWarnings({"rawtypes"}) boolean failure = results.get("failure") != null && ((SimpleOrderedMap) results.get("failure")).size() > 0;
-          if (failure) {
-            log.error("Failure creating collection {}", results.get("failure"));
-            //        // Let's cleanup as we hit an exception
-            //        // We shouldn't be passing 'results' here for the cleanup as the response would then contain 'success'
-            //        // element, which may be interpreted by the user as a positive ack
-            //        // nocommit review
-            try {
-              response.clusterState = ocmh.cleanupCollection(collectionName, new NamedList<Object>()).clusterState;
-            } catch (Exception e) {
-              log.error("Exception trying to clean up collection after fail {}", collectionName);
+        @SuppressWarnings({"rawtypes"}) boolean failure = results.get("failure") != null && ((SimpleOrderedMap) results.get("failure")).size() > 0;
+        if (failure) {
+          log.error("Failure creating collection {}", results.get("failure"));
+          //        // Let's cleanup as we hit an exception
+          //        // We shouldn't be passing 'results' here for the cleanup as the response would then contain 'success'
+          //        // element, which may be interpreted by the user as a positive ack
+          //        // nocommit review
+          try {
+            AddReplicaCmd.Response rsp = ocmh.cleanupCollection(collectionName, new NamedList<Object>());
+
+            response.clusterState = rsp.clusterState;
+            if (rsp.asyncFinalRunner != null) {
+              rsp.asyncFinalRunner.call();
             }
-            if (log.isDebugEnabled()) log.debug("Cleaned up artifacts for failed create collection for [{}]", collectionName);
-            throw new SolrException(ErrorCode.BAD_REQUEST, "Underlying core creation failed while creating collection: " + collectionName + "\n" + results);
-          } else {
-            Object createNodeSet = message.get(ZkStateReader.CREATE_NODE_SET);
-            if (log.isDebugEnabled()) log.debug("createNodeSet={}", createNodeSet);
-            if (createNodeSet == null || (!createNodeSet.equals("") && !createNodeSet.equals(ZkStateReader.CREATE_NODE_SET_EMPTY))) {
+          } catch (Exception e) {
+            log.error("Exception trying to clean up collection after fail {}", collectionName);
+          }
+          if (log.isDebugEnabled()) log.debug("Cleaned up artifacts for failed create collection for [{}]", collectionName);
+          throw new SolrException(ErrorCode.BAD_REQUEST, "Underlying core creation failed while creating collection: " + collectionName + "\n" + results);
+        } else {
+          Object createNodeSet = message.get(ZkStateReader.CREATE_NODE_SET);
+          if (log.isDebugEnabled()) log.debug("createNodeSet={}", createNodeSet);
+          if (createNodeSet == null || !createNodeSet.equals(ZkStateReader.CREATE_NODE_SET_EMPTY)) {
             try {
-                zkStateReader.waitForState(collectionName, 10, TimeUnit.SECONDS, (l, c) -> {
-                  if (c == null) {
+              zkStateReader.waitForState(collectionName, 30, TimeUnit.SECONDS, (l, c) -> {
+                if (c == null) {
+                  return false;
+                }
+                for (String name : coresToCreate.keySet()) {
+                  if (log.isTraceEnabled()) log.trace("look for core {}", name);
+                  if (c.getReplica(name) == null || c.getReplica(name).getState() != Replica.State.ACTIVE) {
+                    if (log.isTraceEnabled()) log.trace("not the right replica or state {}", c.getReplica(name));
                     return false;
                   }
-                  for (String name : coresToCreate.keySet()) {
-                    log.info("look for core {}", name);
-                    if (c.getReplica(name) == null || c.getReplica(name).getState() != Replica.State.ACTIVE) {
-                      log.info("not the right replica {}", c.getReplica(name));
-                      return false;
-                    }
-                  }
-                  Collection<Slice> slices = c.getSlices();
-                  if (slices.size() < shardNames.size()) {
-                    log.info("wrong number slices {} vs {}", slices.size(), shardNames.size());
+                }
+                Collection<Slice> slices = c.getSlices();
+                if (slices.size() < shardNames.size()) {
+                  if (log.isTraceEnabled()) log.trace("wrong number slices {} vs {}", slices.size(), shardNames.size());
+                  return false;
+                }
+                for (Slice slice : slices) {
+                  if (log.isTraceEnabled()) log.trace("slice {} leader={}", slice, slice.getLeader());
+                  if (slice.getLeader() == null || slice.getLeader().getState() != Replica.State.ACTIVE) {
+                    if (log.isTraceEnabled()) log.trace("no leader found for slice {}", slice.getName());
                     return false;
                   }
-                  for (Slice slice : slices) {
-                    log.info("slice {} leader={}", slice, slice.getLeader());
-                    if (slice.getLeader() == null || slice.getLeader().getState() != Replica.State.ACTIVE) {
-                      log.info("no leader found for slice {}", slice.getName());
-                      return false;
-                    }
-                  }
-                  log.info("return true, everything active");
-                  return true;
-                });
-              } catch(InterruptedException e) {
-                log.warn("Interrupted waiting for active replicas on collection creation {}", collectionName);
-                throw new SolrException(ErrorCode.SERVER_ERROR, e);
-              } catch(TimeoutException e){
-                log.error("Exception waiting for active replicas on collection creation {}", collectionName);
-                throw new SolrException(ErrorCode.SERVER_ERROR, e);
-              }
+                }
+                if (log.isTraceEnabled()) log.trace("return true, everything active");
+                return true;
+              });
+            } catch (InterruptedException e) {
+              log.warn("Interrupted waiting for active replicas on collection creation {}", collectionName);
+              throw new SolrException(ErrorCode.SERVER_ERROR, e);
+            } catch (TimeoutException e) {
+              log.error("Exception waiting for active replicas on collection creation {}", collectionName);
+              throw new SolrException(ErrorCode.SERVER_ERROR, e);
             }
+          }
 
-            if (log.isDebugEnabled()) log.debug("Finished create command on all shards for collection: {}", collectionName);
-
-            // Emit a warning about production use of data driven functionality
-            boolean defaultConfigSetUsed = message.getStr(COLL_CONF) == null || message.getStr(COLL_CONF).equals(ConfigSetsHandlerApi.DEFAULT_CONFIGSET_NAME);
-            if (defaultConfigSetUsed) {
-              results.add("warning", "Using _default configset. Data driven schema functionality" + " is enabled by default, which is NOT RECOMMENDED for production use. To turn it off:"
-                  + " curl http://{host:port}/solr/" + collectionName + "/config -d '{\"set-user-property\": {\"update.autoCreateFields\":\"false\"}}'");
-            }
+          if (log.isDebugEnabled()) log.debug("Finished create command on all shards for collection: {}", collectionName);
 
+          // Emit a warning about production use of data driven functionality
+          boolean defaultConfigSetUsed = message.getStr(COLL_CONF) == null || message.getStr(COLL_CONF).equals(ConfigSetsHandlerApi.DEFAULT_CONFIGSET_NAME);
+          if (defaultConfigSetUsed) {
+            results.add("warning",
+                "Using _default configset. Data driven schema functionality" + " is enabled by default, which is NOT RECOMMENDED for production use. To turn it off:" + " curl http://{host:port}/solr/"
+                    + collectionName + "/config -d '{\"set-user-property\": {\"update.autoCreateFields\":\"false\"}}'");
           }
 
-          return response;
         }
-      };
-    }
+
+        return response;
+      }
+    };
+
     if (log.isDebugEnabled()) log.debug("return cs from create collection cmd {}", clusterState);
     response.clusterState = clusterState;
     return response;
@@ -538,10 +575,11 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
     for (String shardName : slices.keySet()) {
       try {
         stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + cName + "/" + shardName, null, CreateMode.PERSISTENT, false);
-        // stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + cName + "/leader_elect", null, CreateMode.PERSISTENT, false);
+        stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + cName + "/leader_elect", null, CreateMode.PERSISTENT, false);
         stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + cName + "/leader_elect/" + shardName, null, CreateMode.PERSISTENT, false);
         stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + cName + "/leader_elect/" + shardName + "/election", null, CreateMode.PERSISTENT, false);
         stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + cName + "/leaders/" + shardName, null, CreateMode.PERSISTENT, false);
+        stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE  + "/" + cName + "/terms", null, CreateMode.PERSISTENT, false);
         stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE  + "/" + cName + "/terms/" + shardName, ZkStateReader.emptyJson, CreateMode.PERSISTENT, false);
       } catch (AlreadyExistsException e) {
         // okay
@@ -555,7 +593,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
       }
     }
     DocCollection newCollection = new DocCollection(cName,
-            slices, collectionProps, router, 0);
+            slices, collectionProps, router, 0, false);
 
     return newCollection;
   }
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteAliasCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteAliasCmd.java
index 59b08f8..b6412d2 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteAliasCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteAliasCmd.java
@@ -38,7 +38,7 @@ public class DeleteAliasCmd implements OverseerCollectionMessageHandler.Cmd {
 
     ZkStateReader zkStateReader = ocmh.zkStateReader;
     zkStateReader.aliasesManager.applyModificationAndExportToZk(a -> a.cloneWithCollectionAlias(aliasName, null));
-    return null;
+    return new AddReplicaCmd.Response();
   }
 
 }
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteCollectionCmd.java
index 9f20a2a..a2f008b 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteCollectionCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteCollectionCmd.java
@@ -147,17 +147,20 @@ public class DeleteCollectionCmd implements OverseerCollectionMessageHandler.Cmd
 
       shardRequestTracker = new OverseerCollectionMessageHandler.ShardRequestTracker(asyncId, message.getStr("operation"), ocmh.adminPath, zkStateReader, ocmh.shardHandlerFactory, ocmh.overseer);
 
-      @SuppressWarnings({"unchecked"}) List<Replica> failedReplicas = ocmh.collectionCmd(internalMsg, params, results, null, asyncId, okayExceptions, shardHandler, shardRequestTracker);
+      @SuppressWarnings({"unchecked"}) List<Replica> notLifeReplicas = ocmh.collectionCmd(internalMsg, params, results, null, asyncId, okayExceptions, shardHandler, shardRequestTracker);
 
-      if (failedReplicas == null) {
+      if (notLifeReplicas == null) {
         // TODO: handle this in any special way? more logging?
+        log.warn("The following replicas where not live to receive an unload command {}", notLifeReplicas);
       }
 
     } finally {
 
       // make sure it's gone again after cores have been removed
       try {
+        ocmh.overseer.getCoreContainer().getZkController().removeCollectionTerms(collection);
         zkStateReader.getZkClient().clean(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection);
+        ocmh.overseer.getCoreContainer().getZkController().removeCollectionTerms(collection);
       } catch (Exception e) {
         log.error("Exception while trying to remove collection zknode", e);
       }
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteReplicaCmd.java
index 4640ac2..3996fe9 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteReplicaCmd.java
@@ -180,7 +180,7 @@ public class DeleteReplicaCmd implements Cmd {
           }
 
           try {
-            waitForCoreNodeGone(collectionName, shard, replicaName, 10000); // nocommit timeout
+            waitForCoreNodeGone(collectionName, shard, replicaName, 5000); // nocommit timeout
           } catch (Exception e) {
             log.error("", e);
           }
@@ -360,7 +360,7 @@ public class DeleteReplicaCmd implements Cmd {
       params.set(CoreAdminParams.DELETE_DATA_DIR, message.getBool(CoreAdminParams.DELETE_DATA_DIR, true));
       params.set(CoreAdminParams.DELETE_METRICS_HISTORY, message.getBool(CoreAdminParams.DELETE_METRICS_HISTORY, true));
 
-      isLive = ocmh.zkStateReader.getClusterState().getLiveNodes().contains(replica.getNodeName());
+      isLive = ocmh.zkStateReader.getLiveNodes().contains(replica.getNodeName());
 
 
       if (isLive) {
@@ -390,7 +390,11 @@ public class DeleteReplicaCmd implements Cmd {
         if (c == null)
           return true;
         Slice slice = c.getSlice(shard);
-        if(slice == null || slice.getReplica(replicaName) == null) {
+        if(slice == null) {
+          return true;
+        }
+        Replica r = slice.getReplica(replicaName);
+        if(r == null || !ocmh.zkStateReader.isNodeLive(r.getNodeName())) {
           return true;
         }
         return false;
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/MigrateCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/MigrateCmd.java
index adc3df6..b1dbe7b 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/MigrateCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/MigrateCmd.java
@@ -331,9 +331,10 @@ public class MigrateCmd implements OverseerCollectionMessageHandler.Cmd {
       log.info("Asking temp source leader to wait for: {} to be alive on: {}", tempCollectionReplica2, targetLeader.getNodeName());
     }
     cmd = new CoreAdminRequest.WaitForState();
-    cmd.setCoreName(tempSourceLeader.getStr("core"));
+    cmd.setCoreName(tempSourceLeader.getName());
     cmd.setNodeName(targetLeader.getNodeName());
     cmd.setState(Replica.State.ACTIVE);
+    cmd.setShardId(tempSourceLeader.getSlice());
     cmd.setCheckLive(true);
     cmd.setOnlyIfLeader(true);
     params = new ModifiableSolrParams(cmd.getParams());
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
index 8634e6b..9ec8ec1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/MoveReplicaCmd.java
@@ -94,8 +94,8 @@ public class MoveReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
     if (coll == null) {
       throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection + " does not exist");
     }
-    if (!clusterState.getLiveNodes().contains(targetNode)) {
-      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Target node: " + targetNode + " not in live nodes: " + clusterState.getLiveNodes());
+    if (!ocmh.zkStateReader.getLiveNodes().contains(targetNode)) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Target node: " + targetNode + " not in live nodes: " + ocmh.zkStateReader.getLiveNodes());
     }
     Replica replica = null;
     if (message.containsKey(REPLICA_PROP)) {
@@ -190,7 +190,7 @@ public class MoveReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
   private void moveHdfsReplica(ClusterState clusterState, @SuppressWarnings({"rawtypes"}) NamedList results, String dataDir, String targetNode, String async, DocCollection coll, Replica replica,
       Slice slice, int timeout, boolean waitForFinalState) throws Exception {
     String skipCreateReplicaInClusterState = "true";
-    if (clusterState.getLiveNodes().contains(replica.getNodeName())) {
+    if (ocmh.zkStateReader.getLiveNodes().contains(replica.getNodeName())) {
       skipCreateReplicaInClusterState = "false";
       ZkNodeProps removeReplicasProps = new ZkNodeProps(COLLECTION_PROP, coll.getName(), SHARD_ID_PROP, slice.getName(), REPLICA_PROP, replica.getName());
       removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_DATA_DIR, false);
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java
index 2d9eb14..54375f2 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java
@@ -39,6 +39,7 @@ import org.apache.solr.cloud.Stats;
 import org.apache.solr.cloud.ZkController;
 import org.apache.solr.cloud.overseer.CollectionMutator;
 import org.apache.solr.cloud.overseer.OverseerAction;
+import org.apache.solr.cloud.overseer.ZkStateWriter;
 import org.apache.solr.common.AlreadyClosedException;
 import org.apache.solr.common.ParWork;
 import org.apache.solr.common.SolrCloseable;
@@ -275,19 +276,32 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
 
   @Override
   @SuppressWarnings("unchecked")
-  public OverseerSolrResponse processMessage(ZkNodeProps message, String operation) throws InterruptedException {
+  public OverseerSolrResponse processMessage(ZkNodeProps message, String operation, ZkStateWriter zkWriter) throws InterruptedException {
     MDCLoggingContext.setCollection(message.getStr(COLLECTION));
     MDCLoggingContext.setShard(message.getStr(SHARD_ID_PROP));
     MDCLoggingContext.setReplica(message.getStr(REPLICA_PROP));
     if (log.isDebugEnabled()) log.debug("OverseerCollectionMessageHandler.processMessage : {} , {}", operation, message);
-    ClusterState clusterState = overseer.getZkStateWriter().getClusterstate(false);
-    @SuppressWarnings({"rawtypes"})
-    NamedList results = new NamedList();
-    String collection = message.getStr("collection");
-    if (collection == null) {
-      collection = message.getStr("name");
-    }
+
+    ClusterState clusterState = zkWriter.getClusterstate(false);
+    @SuppressWarnings({"rawtypes"}) NamedList results = new NamedList();
     try {
+      String collection = message.getStr("collection");
+      if (collection == null) {
+        collection = message.getStr("name");
+      }
+
+      if (operation.equals("cleanup")) {
+        log.info("Found item that needs cleanup {}", message);
+        String op = message.getStr(Overseer.QUEUE_OPERATION);
+        CollectionAction action = getCollectionAction(op);
+        Cmd command = commandMap.get(action);
+        boolean drop = command.cleanup(message);
+        if (drop) {
+          return null;
+        }
+        return new OverseerSolrResponse(null);
+      }
+
       CollectionAction action = getCollectionAction(operation);
       Cmd command = commandMap.get(action);
       if (command != null) {
@@ -300,18 +314,20 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
 
         if (responce.clusterState != null) {
           DocCollection docColl = responce.clusterState.getCollectionOrNull(collection);
-          Map<String, DocCollection> collectionStates = null;
+          Map<String,DocCollection> collectionStates = null;
           if (docColl != null) {
             log.info("create new single collection state for collection {}", docColl.getName());
             collectionStates = new HashMap<>();
             collectionStates.put(docColl.getName(), docColl);
           } else {
             log.info("collection not found in returned state {} {}", collection, responce.clusterState);
-            overseer.getZkStateWriter().removeCollection(collection);
+            if (collection != null) {
+              zkWriter.removeCollection(collection);
+            }
           }
           if (collectionStates != null) {
-            ClusterState cs = new ClusterState(responce.clusterState.getLiveNodes(), collectionStates);
-            overseer.getZkStateWriter().enqueueUpdate(cs, null, false);
+            ClusterState cs = ClusterState.getRefCS(collectionStates, -2);
+            zkWriter.enqueueUpdate(cs, null, false);
           }
 
           overseer.writePendingUpdates();
@@ -323,32 +339,31 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
           if (log.isDebugEnabled()) log.debug("Finalize after Command returned clusterstate={}", resp.clusterState);
           if (resp.clusterState != null) {
             DocCollection docColl = resp.clusterState.getCollectionOrNull(collection);
-            Map<String, DocCollection> collectionStates;
+            Map<String,DocCollection> collectionStates;
             if (docColl != null) {
               collectionStates = new HashMap<>();
               collectionStates.put(docColl.getName(), docColl);
             } else {
               collectionStates = new HashMap<>();
             }
-            ClusterState cs = new ClusterState(responce.clusterState.getLiveNodes(), collectionStates);
+            ClusterState cs = ClusterState.getRefCS(collectionStates, -2);
 
-            overseer.getZkStateWriter().enqueueUpdate(cs, null,false);
+            zkWriter.enqueueUpdate(cs, null, false);
             overseer.writePendingUpdates();
           }
         }
 
         if (collection != null && responce.clusterState != null) {
-          Integer version = overseer.getZkStateWriter().lastWrittenVersion(collection);
+          Integer version = zkWriter.lastWrittenVersion(collection);
           if (version != null && !action.equals(DELETE)) {
             results.add("csver", version);
           } else {
-             //deleted
+            //deleted
           }
         }
 
       } else {
-        throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown operation:"
-                + operation);
+        throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown operation:" + operation);
       }
       if (results.get("success") == null) results.add("success", new NamedList<>());
 
@@ -359,24 +374,20 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
         results.add("exception", nl);
       }
 
-    }  catch (InterruptedException e) {
-      ParWork.propagateInterrupt(e);
-      throw e;
     } catch (Exception e) {
       String collName = message.getStr("collection");
       if (collName == null) collName = message.getStr(NAME);
 
       if (collName == null) {
         log.error("Operation " + operation + " failed", e);
-      } else  {
-        log.error("Collection: " + collName + " operation: " + operation
-                + " failed", e);
+      } else {
+        log.error("Collection: " + collName + " operation: " + operation + " failed", e);
       }
 
       results.add("Operation " + operation + " caused exception:", e);
       SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>();
       nl.add("msg", e.getMessage());
-      nl.add("rspCode", e instanceof SolrException ? ((SolrException)e).code() : -1);
+      nl.add("rspCode", e instanceof SolrException ? ((SolrException) e).code() : -1);
       results.add("exception", nl);
     }
 
@@ -686,7 +697,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
             for (Slice slice : slices) {
               for (Replica replica : slice.getReplicas()) {
                 if (coreUrl.equals(replica.getCoreUrl()) && ((requireActive ? replica.getState().equals(Replica.State.ACTIVE) : true)
-                        && zkStateReader.getClusterState().liveNodesContain(replica.getNodeName()))) {
+                        && zkStateReader.isNodeLive(replica.getNodeName()))) {
                   r.put(coreUrl, replica);
                   break;
                 }
@@ -734,7 +745,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
   void validateConfigOrThrowSolrException(String configName) throws IOException, KeeperException, InterruptedException {
     boolean isValid = cloudManager.getDistribStateManager().hasData(ZkConfigManager.CONFIGS_ZKNODE + "/" + configName);
     if(!isValid) {
-      overseer.getZkStateReader().getZkClient().printLayout();
+      //overseer.getZkStateReader().getZkClient().printLayout();
       throw new SolrException(ErrorCode.BAD_REQUEST, "Can not find the specified config set: " + configName);
     }
   }
@@ -795,7 +806,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
       shardRequestTracker = new ShardRequestTracker(asyncId, message.getStr("operation"), adminPath, zkStateReader, shardHandlerFactory, overseer);
     }
     for (Slice slice : coll.getSlices()) {
-      notLivesReplicas.addAll(shardRequestTracker.sliceCmd(clusterState, params, stateMatcher, slice, shardHandler));
+      notLivesReplicas.addAll(shardRequestTracker.sliceCmd(params, stateMatcher, slice, shardHandler));
     }
     if (processResponses) {
       shardRequestTracker.processResponses(results, shardHandler, false, null, okayExceptions);
@@ -1046,6 +1057,10 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
 
   protected interface Cmd {
     AddReplicaCmd.Response call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception;
+
+    default boolean cleanup(ZkNodeProps message) {
+      return false;
+    }
   }
 
   protected interface Finalize {
@@ -1089,12 +1104,12 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
      * Send request to all replicas of a slice
      * @return List of replicas which is not live for receiving the request
      */
-    public List<Replica> sliceCmd(ClusterState clusterState, ModifiableSolrParams params, Replica.State stateMatcher,
+    public List<Replica> sliceCmd(ModifiableSolrParams params, Replica.State stateMatcher,
                                   Slice slice, ShardHandler shardHandler) {
       List<Replica> notLiveReplicas = new ArrayList<>();
       for (Replica replica : slice.getReplicas()) {
         if ((stateMatcher == null || Replica.State.getState(replica.getStr(ZkStateReader.STATE_PROP)) == stateMatcher)) {
-          if (clusterState.liveNodesContain(replica.getStr(ZkStateReader.NODE_NAME_PROP))) {
+          if (zkStateReader.isNodeLive(replica.getStr(ZkStateReader.NODE_NAME_PROP))) {
             // For thread safety, only simple clone the ModifiableSolrParams
             ModifiableSolrParams cloneParams = new ModifiableSolrParams();
             cloneParams.add(params);
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/RenameCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/RenameCmd.java
index bc7d886..9ed965b 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/RenameCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/RenameCmd.java
@@ -73,6 +73,6 @@ public class RenameCmd implements OverseerCollectionMessageHandler.Cmd {
     }
 
     ocmh.zkStateReader.aliasesManager.applyModificationAndExportToZk(a -> a.cloneWithRename(extCollectionName, target));
-    return null;
+    return new AddReplicaCmd.Response();
   }
 }
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/ReplaceNodeCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/ReplaceNodeCmd.java
index 965e8f0..8531923 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/ReplaceNodeCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/ReplaceNodeCmd.java
@@ -68,10 +68,10 @@ public class ReplaceNodeCmd implements OverseerCollectionMessageHandler.Cmd {
     int timeout = message.getInt("timeout", 10 * 60); // 10 minutes
     boolean parallel = message.getBool("parallel", false);
 
-    if (!clusterState.liveNodesContain(source)) {
+    if (!zkStateReader.isNodeLive(source)) {
       throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Source Node: " + source + " is not live");
     }
-    if (target != null && !clusterState.liveNodesContain(target)) {
+    if (target != null && !zkStateReader.isNodeLive(target)) {
       throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Target Node: " + target + " is not live");
     }
     List<ZkNodeProps> sourceReplicas = getReplicasOfNode(source, clusterState);
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
index 89cd1b1..54c97b8 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
@@ -119,7 +119,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
 
     // Get the Solr nodes to restore a collection.
     final List<String> nodeList = Assign.getLiveOrLiveAndCreateNodeSetList(
-        zkStateReader.getClusterState().getLiveNodes(), message, OverseerCollectionMessageHandler.RANDOM);
+        zkStateReader.getLiveNodes(), message, OverseerCollectionMessageHandler.RANDOM);
 
     int numShards = backupCollectionState.getActiveSlices().size();
 
@@ -311,7 +311,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
         params.set(NAME, "snapshot." + slice.getName());
         params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString());
         params.set(CoreAdminParams.BACKUP_REPOSITORY, repo);
-        shardRequestTracker.sliceCmd(clusterState, params, null, slice, shardHandler);
+        shardRequestTracker.sliceCmd(params, null, slice, shardHandler);
       }
       shardRequestTracker.processResponses(new NamedList(), shardHandler, true, "Could not restore core");
     }
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
index ae8f773..944de1d 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
@@ -54,12 +54,10 @@ import org.apache.solr.util.TestInjection;
 import org.apache.zookeeper.data.Stat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-
 import static org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider.Variable.CORE_IDX;
 import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
 import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_TYPE;
 import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
-import static org.apache.solr.common.params.CollectionAdminParams.COLLECTION;
 import static org.apache.solr.common.params.CollectionAdminParams.FOLLOW_ALIASES;
 import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICA;
 import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
@@ -77,6 +75,7 @@ import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -135,6 +134,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
     String splitKey = message.getStr("split.key");
 
     Slice parentSlice = getParentSlice(clusterState, collectionName, slice, splitKey);
+    // nocommit
 //    if (parentSlice.getState() != Slice.State.ACTIVE) {
 //      throw new SolrException(SolrException.ErrorCode.INVALID_STATE, "Parent slice is not active: " +
 //          collectionName + "/ " + parentSlice.getName() + ", state=" + parentSlice.getState());
@@ -150,7 +150,9 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
     }
 
     RTimerTree t = timings.sub("checkDiskSpace");
-    checkDiskSpace(collectionName, slice.get(), parentShardLeader, splitMethod, ocmh.cloudManager);
+    if (Boolean.getBoolean("solr.enableMetrics")) {
+      checkDiskSpace(collectionName, slice.get(), parentShardLeader, splitMethod, ocmh.cloudManager);
+    }
     t.stop();
 
     // let's record the ephemeralOwner of the parent leader node
@@ -282,7 +284,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
 
       t = timings.sub("createSubSlicesAndLeadersInState");
       List<OverseerCollectionMessageHandler.Finalize> firstReplicaFutures = new ArrayList<>();
-      Set<Runnable> firstReplicaRunAfters = ConcurrentHashMap.newKeySet();
+
       for (int i = 0; i < subRanges.size(); i++) {
         String subSlice = subSlices.get(i);
         String subShardName = subShardNames.get(i);
@@ -302,8 +304,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
 
         //ocmh.overseer.offerStateUpdate(Utils.toJSON(new ZkNodeProps(propMap)));
         clusterState = new CollectionMutator(ocmh.cloudManager).createShard(clusterState, new ZkNodeProps(propMap));
-        // wait until we are able to see the new shard in cluster state and refresh the local view of the cluster state
-        //ocmh.waitForNewShard(collectionName, subSlice);
+
 
         log.debug("Adding first replica {} as part of slice {} of collection {} on {}"
             , subShardName, subSlice, collectionName, nodeName);
@@ -343,18 +344,12 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
 //        });
 //        firstReplicaFutures.add(future);
       }
-      DocCollection docColl = clusterState.getCollectionOrNull(message.getStr(COLLECTION));
-      Map<String, DocCollection> collectionStates;
-      if (docColl != null) {
-        collectionStates = new HashMap<>();
-        collectionStates.put(docColl.getName(), docColl);
-      } else {
-        collectionStates = new HashMap<>();
-      }
-      ClusterState cs = new ClusterState(clusterState.getLiveNodes(), collectionStates);
 
-      ocmh.overseer.getZkStateWriter().enqueueUpdate(cs, null,false);
+      ocmh.overseer.getZkStateWriter().enqueueUpdate(clusterState, null,false);
       ocmh.overseer.writePendingUpdates();
+
+      log.info("Clusterstate after adding new shard for split {}", clusterState);
+
       firstReplicaFutures.forEach(future -> {
         try {
           future.call();
@@ -382,6 +377,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
           CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState();
           cmd.setCoreName(subShardName);
           cmd.setNodeName(nodeName);
+          cmd.setShardId(subShardName);
           cmd.setState(Replica.State.ACTIVE);
           cmd.setCheckLive(true);
           cmd.setOnlyIfLeader(true);
@@ -427,40 +423,39 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
       }
       t.stop();
 
-      if (log.isDebugEnabled()) {
-        log.debug("Index on shard: {} split into {} successfully", nodeName, subShardNames.size());
+
+      log.info("Index on shard: {} split into {} successfully", nodeName, subShardNames.size());
+
+
+      t = timings.sub("applyBufferedUpdates");
+      // apply buffered updates on sub-shards
+      {
+        final ShardRequestTracker shardRequestTracker = ocmh.asyncRequestTracker(asyncId, message.getStr("operation"));
+
+        for (int i = 0; i < subShardNames.size(); i++) {
+          String subShardName = subShardNames.get(i);
+
+          log.debug("Applying buffered updates on : {}", subShardName);
+
+          params = new ModifiableSolrParams();
+          params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTAPPLYUPDATES.toString());
+          params.set(CoreAdminParams.NAME, subShardName);
+
+          shardRequestTracker.sendShardRequest(nodeName, params, shardHandler);
+        }
+
+        String msgOnError = "SPLITSHARD failed while asking sub shard leaders to apply buffered updates";
+        shardRequestTracker.processResponses(results, shardHandler, true, msgOnError, Collections.singleton("org.apache.solr.common.SolrException"));
+        handleFailureOnAsyncRequest(results, msgOnError);
       }
+      t.stop();
 
-      // nocommit - where do we enter buffering state??
-//      t = timings.sub("applyBufferedUpdates");
-//      // apply buffered updates on sub-shards
-//      {
-//        final ShardRequestTracker shardRequestTracker = ocmh.asyncRequestTracker(asyncId, message.getStr("operation"));
-//
-//        for (int i = 0; i < subShardNames.size(); i++) {
-//          String subShardName = subShardNames.get(i);
-//
-//          log.debug("Applying buffered updates on : {}", subShardName);
-//
-//          params = new ModifiableSolrParams();
-//          params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTAPPLYUPDATES.toString());
-//          params.set(CoreAdminParams.NAME, subShardName);
-//
-//          shardRequestTracker.sendShardRequest(nodeName, params, shardHandler);
-//        }
-//
-//        String msgOnError = "SPLITSHARD failed while asking sub shard leaders to apply buffered updates";
-//        shardRequestTracker.processResponses(results, shardHandler, true, msgOnError);
-//        handleFailureOnAsyncRequest(results, msgOnError);
-//      }
-//      t.stop();
-//
-//      log.debug("Successfully applied buffered updates on : {}", subShardNames);
+      log.debug("Successfully applied buffered updates on : {}", subShardNames);
 
       // Replica creation for the new Slices
       // replica placement is controlled by the autoscaling policy framework
 
-      Set<String> nodes = clusterState.getLiveNodes();
+      Set<String> nodes = ocmh.zkStateReader.getLiveNodes();
       List<String> nodeList = new ArrayList<>(nodes.size());
       nodeList.addAll(nodes);
 
@@ -486,7 +481,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
           .assignNrtReplicas(numNrt.get())
           .assignTlogReplicas(numTlog.get())
           .assignPullReplicas(numPull.get())
-          .onNodes(new ArrayList<>(clusterState.getLiveNodes()))
+          .onNodes(nodeList)
           .build();
       Assign.AssignStrategyFactory assignStrategyFactory = new Assign.AssignStrategyFactory(ocmh.cloudManager);
       Assign.AssignStrategy assignStrategy = assignStrategyFactory.create();
@@ -506,16 +501,19 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
         // avoid a race condition where Overseer may prematurely activate the new sub-slices (and deactivate
         // the parent slice) before all new replicas are added. This situation may lead to a loss of performance
         // because the new shards will be activated with possibly many fewer replicas.
-//        ZkNodeProps props = new ZkNodeProps(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower(),
-//            ZkStateReader.COLLECTION_PROP, collectionName,
-//            ZkStateReader.SHARD_ID_PROP, sliceName,
-//            ZkStateReader.CORE_NAME_PROP, solrCoreName,
-//            ZkStateReader.REPLICA_TYPE, replicaPosition.type.name(),
-//            ZkStateReader.STATE_PROP, Replica.State.DOWN.toString(),
-//            ZkStateReader.BASE_URL_PROP, zkStateReader.getBaseUrlForNodeName(subShardNodeName),
-//            ZkStateReader.NODE_NAME_PROP, subShardNodeName,
-//            CommonAdminParams.WAIT_FOR_FINAL_STATE, Boolean.toString(waitForFinalState));
-//        ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
+        ZkNodeProps props = new ZkNodeProps(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower(),
+            ZkStateReader.COLLECTION_PROP, collectionName,
+            ZkStateReader.SHARD_ID_PROP, sliceName,
+            ZkStateReader.CORE_NAME_PROP, solrCoreName,
+            ZkStateReader.REPLICA_TYPE, replicaPosition.type.name(),
+            ZkStateReader.STATE_PROP, Replica.State.DOWN.toString(),
+            ZkStateReader.BASE_URL_PROP, zkStateReader.getBaseUrlForNodeName(subShardNodeName),
+            ZkStateReader.NODE_NAME_PROP, subShardNodeName,
+            CommonAdminParams.WAIT_FOR_FINAL_STATE, Boolean.toString(waitForFinalState));
+
+        AddReplicaCmd.Response resp = new AddReplicaCmd(ocmh, true).call(clusterState, props, results);
+        clusterState = resp.clusterState;
+
 
         HashMap<String, Object> propMap = new HashMap<>();
         propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
@@ -523,7 +521,6 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
         propMap.put(SHARD_ID_PROP, sliceName);
         propMap.put(REPLICA_TYPE, replicaPosition.type.name());
         propMap.put(ZkStateReader.NODE_NAME_PROP, subShardNodeName);
-        //propMap.put("replica", solrCoreName);
         propMap.put(CoreAdminParams.NAME, solrCoreName);
         // copy over property params:
         for (String key : message.keySet()) {
@@ -609,28 +606,28 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
       t = timings.sub("createCoresForReplicas");
 
       List<Future> replicaFutures = new ArrayList<>();
-      Set<Runnable> replicaRunAfters = ConcurrentHashMap.newKeySet();
-//      for (Map<String, Object> replica : replicas) {
-//        ocmh.addReplica(clusterState, new ZkNodeProps(replica), results);
-//      }
+      Set<OverseerCollectionMessageHandler.Finalize> replicaRunAfters = ConcurrentHashMap.newKeySet();
+      for (Map<String, Object> replica : replicas) {
+        ocmh.addReplica(clusterState, new ZkNodeProps(replica), results);
+      }
 
       // now actually create replica cores on sub shard nodes
-//      for (Map<String, Object> replica : replicas) {
-//        ClusterState finalClusterState = clusterState;
-//        Future<?> future = ocmh.overseer.getTaskExecutor().submit(() -> {
-//          AddReplicaCmd.Response response = null;
-//          try {
-//            response = ocmh.addReplicaWithResp(finalClusterState, new ZkNodeProps(replica), results, null);
-//          } catch (Exception e) {
-//            log.error("", e);
-//          }
-//          if (response != null && response.asyncFinalRunner != null) {
-//            replicaRunAfters.add(response.asyncFinalRunner);
-//          }
-//        });
-//
-//        replicaFutures.add(future);
-//      }
+      for (Map<String, Object> replica : replicas) {
+        ClusterState finalClusterState = clusterState;
+        Future<?> future = ocmh.overseer.getTaskExecutor().submit(() -> {
+          AddReplicaCmd.Response response = null;
+          try {
+            response = new AddReplicaCmd(ocmh).call(finalClusterState, new ZkNodeProps(replica), results);
+          } catch (Exception e) {
+            log.error("", e);
+          }
+          if (response != null && response.asyncFinalRunner != null) {
+            replicaRunAfters.add(response.asyncFinalRunner);
+          }
+        });
+
+        replicaFutures.add(future);
+      }
 
       assert TestInjection.injectSplitFailureAfterReplicaCreation();
 
@@ -642,15 +639,15 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
       }
       t.stop();
 
-//      replicaFutures.forEach(future -> {
-//        try {
-//          future.get();
-//        } catch (InterruptedException e) {
-//          log.error("", e);
-//        } catch (ExecutionException e) {
-//          log.error("", e);
-//        }
-//      });
+      replicaFutures.forEach(future -> {
+        try {
+          future.get();
+        } catch (InterruptedException e) {
+          log.error("", e);
+        } catch (ExecutionException e) {
+          log.error("", e);
+        }
+      });
 
       log.info("Successfully created all replica shards for all sub-slices {}", subSlices);
 
@@ -667,19 +664,22 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
       }
       success = true;
 
-      collection = clusterState.getCollection(collectionName);
-      for (Map<String,Object> replica : replicas) {
-        clusterState = checkAndCompleteShardSplit(clusterState, collection, replica.get("name").toString(), replica.get("shard").toString(),
-            new Replica(replica.get("name").toString(), replica, replica.get("collection").toString(), replica.get("shard").toString(), ocmh.zkStateReader));
-      }
-
 
       AddReplicaCmd.Response response = new AddReplicaCmd.Response();
 
+      ClusterState finalClusterState = clusterState;
       response.asyncFinalRunner = new OverseerCollectionMessageHandler.Finalize() {
         @Override
         public AddReplicaCmd.Response call() {
+          DocCollection coll = ocmh.overseer.getZkStateReader().getClusterState().getCollection(collectionName);
+          ClusterState completeCs = finalClusterState;
+          for (Map<String,Object> replica : replicas) {
+             completeCs = checkAndCompleteShardSplit(completeCs, coll, replica.get("name").toString(), replica.get("shard").toString(),
+                new Replica(replica.get("name").toString(), replica, replica.get("collection").toString(), replica.get("shard").toString(), ocmh.zkStateReader));
+          }
+
           AddReplicaCmd.Response response = new AddReplicaCmd.Response();
+          response.clusterState = completeCs;
           return response;
 
         }
diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ClusterStateMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ClusterStateMutator.java
index d01753d..f2dd8b6 100644
--- a/solr/core/src/java/org/apache/solr/cloud/overseer/ClusterStateMutator.java
+++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ClusterStateMutator.java
@@ -110,7 +110,7 @@ public class ClusterStateMutator {
     }
 
     DocCollection newCollection = new DocCollection(cName,
-            slices, collectionProps, router, 0);
+            slices, collectionProps, router, 0, false);
 
     return clusterState.copyWith(cName, newCollection);
   }
diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/CollectionMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/CollectionMutator.java
index fe88ae6..e00e455 100644
--- a/solr/core/src/java/org/apache/solr/cloud/overseer/CollectionMutator.java
+++ b/solr/core/src/java/org/apache/solr/cloud/overseer/CollectionMutator.java
@@ -84,19 +84,15 @@ public class CollectionMutator {
 
       // TODO - fix, no makePath (ensure every path part exists), async, single node
       try {
-        stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collectionName
-            + "/leader_elect/" + shardId, null, CreateMode.PERSISTENT, false);
-        stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collectionName
-            + "/leader_elect/" + shardId + LeaderElector.ELECTION_NODE, null, CreateMode.PERSISTENT, false);
-        stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE+ "/" + collectionName + "/" + shardId
-            + ZkStateReader.SHARD_LEADERS_ZKNODE, null, CreateMode.PERSISTENT, false);
 
         stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collectionName + "/" +  shardId, null, CreateMode.PERSISTENT, false);
         stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collectionName + "/leader_elect/" +  shardId, null, CreateMode.PERSISTENT, false);
         stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collectionName + "/leader_elect/" +  shardId + "/election", null, CreateMode.PERSISTENT, false);
         stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collectionName + "/leaders/" +  shardId, null, CreateMode.PERSISTENT, false);
-        stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collectionName + "/terms/" +  shardId, ZkStateReader.emptyJson, CreateMode.PERSISTENT, false);
-        stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collectionName + "/schema_lock", null, CreateMode.PERSISTENT, false);
+
+        stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE  + "/" + collectionName + "/terms", null, CreateMode.PERSISTENT, false);
+        stateManager.makePath(ZkStateReader.COLLECTIONS_ZKNODE  + "/" + collectionName + "/terms/" + shardId, ZkStateReader.emptyJson, CreateMode.PERSISTENT, false);
+
       } catch (AlreadyExistsException e) {
         throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
       } catch (IOException e) {
@@ -164,7 +160,7 @@ public class CollectionMutator {
     }
 
     return clusterState.copyWith(coll.getName(),
-        new DocCollection(coll.getName(), coll.getSlicesMap(), m, coll.getRouter(), coll.getZNodeVersion()));
+        new DocCollection(coll.getName(), coll.getSlicesMap(), m, coll.getRouter(), coll.getZNodeVersion(), false));
   }
 
   public static DocCollection updateSlice(String collectionName, DocCollection collection, Slice slice) {
diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
index 54af3a7..af43a83 100644
--- a/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
+++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
@@ -91,22 +91,18 @@ public class ZkStateWriter {
     this.reader = zkStateReader;
     this.stats = stats;
 
-    zkStateReader.forciblyRefreshAllClusterStateSlow();
-
-    zkStateReader.getZkClient().printLayout();
-
     cs = zkStateReader.getClusterState();
 
     cs.forEachCollection(collection -> {
-      String stateUpdatesPath = ZkStateReader.getCollectionStateUpdatesPath(collection.getName());
-      if (log.isDebugEnabled()) log.debug("clear state updates on new overseer for collection {}", collection.getName());
-      try {
-        reader.getZkClient().setData(stateUpdatesPath, Utils.toJSON(new ZkNodeProps()), -1, true);
-      } catch (KeeperException e) {
-        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
-      } catch (InterruptedException e) {
-        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
-      }
+//      String stateUpdatesPath = ZkStateReader.getCollectionStateUpdatesPath(collection.getName());
+//      if (log.isDebugEnabled()) log.debug("clear state updates on new overseer for collection {}", collection.getName());
+//      try {
+//        reader.getZkClient().setData(stateUpdatesPath, Utils.toJSON(new ZkNodeProps()), -1, true);
+//      } catch (KeeperException e) {
+//        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+//      } catch (InterruptedException e) {
+//        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+//      }
     });
 
     if (log.isDebugEnabled()) log.debug("zkStateWriter starting with cs {}", cs);
@@ -126,6 +122,23 @@ public class ZkStateWriter {
         }
 
         clusterState.forEachCollection(collection -> {
+          if (trackVersions.get(collection.getName()) == null) {
+            reader.forciblyRefreshClusterStateSlow(collection.getName());
+            DocCollection latestColl = reader.getClusterState().getCollectionOrNull(collection.getName());
+
+            if (latestColl == null) {
+              //log.info("no node exists, using version 0");
+              trackVersions.remove(collection.getName());
+            } else {
+              cs.getCollectionStates().put(latestColl.getName(), new ClusterState.CollectionRef(latestColl));
+              //log.info("got version from zk {}", existsStat.getVersion());
+              int version = latestColl.getZNodeVersion();
+              log.info("Updating local tracked version to {} for {}", version, collection.getName());
+              trackVersions.put(collection.getName(), version);
+            }
+          }
+
+
           DocCollection currentCollection = cs.getCollectionOrNull(collection.getName());
           collection.getProperties().remove("pullReplicas");
           collection.getProperties().remove("replicationFactor");
@@ -167,84 +180,123 @@ public class ZkStateWriter {
         if (overseerAction == null) {
           throw new RuntimeException("unknown operation:" + operation + " contents:" + message.getProperties());
         }
+
         switch (overseerAction) {
           case STATE:
             // log.info("state cmd {}", message);
             message.getProperties().remove("operation");
 
             for (Map.Entry<String,Object> entry : message.getProperties().entrySet()) {
-              if (entry.getKey().equals("DOWNNODE")) {
+              if (entry.getKey().equalsIgnoreCase("downnode")) {
+                log.info("set downnode for {}", entry.getValue());
                 cs.forEachCollection(docColl -> {
+
+                  if (trackVersions.get(docColl.getName()) == null) {
+                    reader.forciblyRefreshClusterStateSlow(docColl.getName());
+                    DocCollection latestColl = reader.getClusterState().getCollectionOrNull(docColl.getName());
+
+                    if (latestColl == null) {
+                      //log.info("no node exists, using version 0");
+                      trackVersions.remove(docColl.getName());
+                    } else {
+                      cs.getCollectionStates().put(latestColl.getName(), new ClusterState.CollectionRef(latestColl));
+                      //log.info("got version from zk {}", existsStat.getVersion());
+                      int version = latestColl.getZNodeVersion();
+                      log.info("Updating local tracked version to {} for {}", version, docColl.getName());
+                      trackVersions.put(docColl.getName(), version);
+                    }
+                  }
+
                   ZkNodeProps updates = stateUpdates.get(docColl.getName());
                   if (updates == null) {
                     updates = new ZkNodeProps();
                     stateUpdates.put(docColl.getName(), updates);
                   }
                   Integer ver = trackVersions.get(docColl.getName());
-                  if (ver == null)  {
-                    ver = docColl.getZNodeVersion();
+                  if (ver == null) {
+                    //   ver = docColl.getZNodeVersion();
                     if (ver == null) {
                       ver = 0;
+                    } else {
+
                     }
-                  } else {
-                    ver = ver + 1;
                   }
                   updates.getProperties().put("_cs_ver_", ver.toString());
                   List<Replica> replicas = docColl.getReplicas();
                   for (Replica replica : replicas) {
-                    if (replica.getState() != Replica.State.DOWN) {
+                    if (replica.getState() != Replica.State.DOWN && replica.getNodeName().equals(entry.getValue())) {
+                      log.info("set downnode for replica {}", replica);
                       replica.setState(Replica.State.DOWN);
                       updates.getProperties().put(replica.getName(), Replica.State.getShortState(Replica.State.DOWN));
                       dirtyState.add(docColl.getName());
                     }
                   }
                 });
-                continue;
               }
+            }
+            for (Map.Entry<String,Object> entry : message.getProperties().entrySet()) {
+              if (!entry.getKey().equalsIgnoreCase("downnode")) {
+                String core = entry.getKey();
+                String collectionAndStateString = (String) entry.getValue();
+                log.info("collectionAndState={}", collectionAndStateString);
+                String[] collectionAndState = collectionAndStateString.split(",");
+                String collection = collectionAndState[0];
+                String setState = collectionAndState[1];
+
+                if (trackVersions.get(collection) == null) {
+                  reader.forciblyRefreshClusterStateSlow(collection);
+                  DocCollection latestColl = reader.getClusterState().getCollectionOrNull(collection);
+
+                  if (latestColl == null) {
+                    //log.info("no node exists, using version 0");
+                    trackVersions.remove(collection);
+                  } else {
+                    cs.getCollectionStates().put(latestColl.getName(), new ClusterState.CollectionRef(latestColl));
+                    //log.info("got version from zk {}", existsStat.getVersion());
+                    int version = latestColl.getZNodeVersion();
+                    log.info("Updating local tracked version to {} for {}", version, collection);
+                    trackVersions.put(collection, version);
+                  }
+                }
 
-              String core = entry.getKey();
-              String collectionAndStateString = (String) entry.getValue();
-              String[] collectionAndState = collectionAndStateString.split(",");
-              String collection = collectionAndState[0];
-              String setState = collectionAndState[1];
-              ZkNodeProps updates = stateUpdates.get(collection);
-              if (updates == null) {
-                updates = new ZkNodeProps();
-                stateUpdates.put(collection, updates);
-              }
-              Integer ver = trackVersions.get(collection);
-              if (ver == null)  {
-                ver = 0;
-              } else {
-                ver = ver + 1;
-              }
-              updates.getProperties().put("_cs_ver_", ver.toString());
-
-              DocCollection docColl = cs.getCollectionOrNull(collection);
-              if (docColl != null) {
-                Replica replica = docColl.getReplica(core);
-                if (replica != null) {
-                  if (setState.equals("leader")) {
-                    if (log.isDebugEnabled()) log.debug("set leader {} {}", message.getStr(ZkStateReader.CORE_NAME_PROP), replica);
-                    Slice slice = docColl.getSlice(replica.getSlice());
-                    slice.setLeader(replica);
-                    replica.setState(Replica.State.ACTIVE);
-                    replica.getProperties().put("leader", "true");
-                    Collection<Replica> replicas = slice.getReplicas();
-                    for (Replica r : replicas) {
-                      if (r != replica) {
-                        r.getProperties().remove("leader");
+                ZkNodeProps updates = stateUpdates.get(collection);
+                if (updates == null) {
+                  updates = new ZkNodeProps();
+                  stateUpdates.put(collection, updates);
+                }
+                Integer ver = trackVersions.get(collection);
+                if (ver == null) {
+                  ver = 0;
+                } else {
+                }
+                updates.getProperties().put("_cs_ver_", ver.toString());
+
+                DocCollection docColl = cs.getCollectionOrNull(collection);
+                if (docColl != null) {
+                  Replica replica = docColl.getReplica(core);
+                  if (replica != null) {
+                    if (setState.equals("leader")) {
+                      if (log.isDebugEnabled()) log.debug("set leader {}", replica);
+                      Slice slice = docColl.getSlice(replica.getSlice());
+                      slice.setLeader(replica);
+                      replica.setState(Replica.State.ACTIVE);
+                      replica.getProperties().put("leader", "true");
+                      Collection<Replica> replicas = slice.getReplicas();
+                      for (Replica r : replicas) {
+                        if (r != replica) {
+                          r.getProperties().remove("leader");
+                        }
                       }
+                      updates.getProperties().put(replica.getName(), "l");
+                      dirtyState.add(collection);
+                    } else {
+
+                      Replica.State state = Replica.State.getState(setState);
+                      updates.getProperties().put(replica.getName(), Replica.State.getShortState(state));
+                      // log.info("set state {} {}", state, replica);
+                      replica.setState(state);
+                      dirtyState.add(collection);
                     }
-                    updates.getProperties().put(replica.getName(), "l");
-                    dirtyState.add(collection);
-                  } else {
-
-                    Replica.State state = Replica.State.getState(setState);
-                    updates.getProperties().put(replica.getName(), Replica.State.getShortState(state));
-                    // log.info("set state {} {}", state, replica);
-                    replica.setState(state);
-                    dirtyState.add(collection);
                   }
                 }
               }
@@ -262,10 +314,10 @@ public class ZkStateWriter {
 
             DocCollection docColl = cs.getCollectionOrNull(collection);
             if (docColl != null) {
-              for (Map.Entry<String,Object> entry : message.getProperties().entrySet()) {
-                Slice slice = docColl.getSlice(entry.getKey());
+              for (Map.Entry<String,Object> e : message.getProperties().entrySet()) {
+                Slice slice = docColl.getSlice(e.getKey());
                 if (slice != null) {
-                  Slice.State state = Slice.State.getState((String) entry.getValue());
+                  Slice.State state = Slice.State.getState((String) e.getValue());
                   slice.setState(state);
                   dirtyStructure.add(collection);
                 }
@@ -306,8 +358,8 @@ public class ZkStateWriter {
         throttle.minimumWaitBetweenActions();
         throttle.markAttemptingAction();
 
-        if (log.isDebugEnabled()) {
-          log.debug("writePendingUpdates {}", cs);
+        if (log.isTraceEnabled()) {
+          log.trace("writePendingUpdates {}", cs);
         }
 
         if (failedUpdates.size() > 0) {
@@ -335,87 +387,71 @@ public class ZkStateWriter {
             String pathSCN = ZkStateReader.getCollectionSCNPath(collection.getName());
            // log.info("process collection {} path {}", collection.getName(), path);
             Stat existsStat = null;
-            if (log.isDebugEnabled()) log.debug("process {}", collection);
+            if (log.isTraceEnabled()) log.trace("process {}", collection);
             try {
              // log.info("get data for {}", name);
               byte[] data = Utils.toJSON(singletonMap(name, collection));
             //  log.info("got data for {} {}", name, data.length);
 
               try {
-              
+                Integer version = null;
                 Integer v = trackVersions.get(collection.getName());
-                Integer version;
+
                 if (v != null) {
                   //log.info("got version from cache {}", v);
                   version = v;
-                  lastVersion.set(version);
-                  if (log.isDebugEnabled()) log.debug("Write state.json prevVersion={} bytes={} col={}", version, data.length, collection);
-
-                  reader.getZkClient().setData(path, data, version, true);
-                  if (dirtyStructure.contains(collection.getName())) {
-                    dirtyStructure.remove(collection.getName());
-                    reader.getZkClient().setData(pathSCN, null, -1, true);
-                    ZkNodeProps updates = stateUpdates.get(collection.getName());
-                    if (updates != null) {
-                      updates.getProperties().clear();
-                    }
-                  }
-
                 } else {
-                  existsStat = reader.getZkClient().exists(path, null);
-                  if (existsStat == null) {
-                    //log.info("no node exists, using version 0");
-                    version = 0;
-                    lastVersion.set(-1);
-                    log.error("No state.json found for collection {}", collection);
-                  } else {
-
-                    //log.info("got version from zk {}", existsStat.getVersion());
-                    version = existsStat.getVersion();
-                    lastVersion.set(version);
-                    if (log.isDebugEnabled()) log.debug("Write state.json prevVersion={} bytes={} col={}", version, data.length, collection);
-
-                    reader.getZkClient().setData(path, data, version, true);
-                    if (dirtyStructure.contains(collection.getName())) {
-                      dirtyStructure.remove(collection.getName());
-                      reader.getZkClient().setData(pathSCN, null, -1, true);
-                      ZkNodeProps updates = stateUpdates.get(collection.getName());
-                      if (updates != null) {
-                        updates.getProperties().clear();
-                      }
-                    }
+                  version = 0;
+                }
+                lastVersion.set(version);
+                if (log.isDebugEnabled()) log.debug("Write state.json prevVersion={} bytes={} col={}", version, data.length, collection);
 
+                reader.getZkClient().setData(path, data, version, true);
+                trackVersions.put(collection.getName(), version + 1);
+                if (dirtyStructure.contains(collection.getName())) {
+                  log.info("structure change in {}", collection.getName());
+                  dirtyStructure.remove(collection.getName());
+                  reader.getZkClient().setData(pathSCN, null, -1, true);
+
+                  ZkNodeProps updates = stateUpdates.get(collection.getName());
+                  if (updates != null) {
+                    updates.getProperties().clear();
                   }
                 }
-                trackVersions.put(collection.getName(), version + 1);
+
               } catch (KeeperException.NoNodeException e) {
                 if (log.isDebugEnabled()) log.debug("No node found for state.json", e);
-                trackVersions.remove(collection.getName());
+
+                lastVersion.set(-1);
+              //  trackVersions.remove(collection.getName());
                 // likely deleted
+                return;
+
               } catch (KeeperException.BadVersionException bve) {
                 //lastFailedException.set(bve);
                 //failedUpdates.put(collection.getName(), collection);
-                existsStat = reader.getZkClient().exists(path, null);
-                trackVersions.put(collection.getName(), existsStat.getVersion());
-                // this is a tragic error, we must disallow usage of this instance
-                log.warn("Tried to update the cluster state using version={} but we where rejected, found {}", lastVersion.get(), existsStat.getVersion(), bve);
+               // Stat estate = reader.getZkClient().exists(path, null);
+                trackVersions.remove(collection.getName());
+                throw bve;
+
               }
 
-              ZkNodeProps updates = stateUpdates.get(collection.getName());
-              if (updates != null) {
-                String stateUpdatesPath = ZkStateReader.getCollectionStateUpdatesPath(collection.getName());
-                log.info("write state updates for collection {} {}", collection.getName(), updates);
-                dirtyState.remove(collection.getName());
-                reader.getZkClient().setData(stateUpdatesPath, Utils.toJSON(updates), -1, true);
-                updates.getProperties().clear();
+              if (dirtyState.contains(collection.getName())) {
+                ZkNodeProps updates = stateUpdates.get(collection.getName());
+                if (updates != null) {
+                  String stateUpdatesPath = ZkStateReader.getCollectionStateUpdatesPath(collection.getName());
+                  log.info("write state updates for collection {} {}", collection.getName(), updates);
+                  dirtyState.remove(collection.getName());
+                  reader.getZkClient().setData(stateUpdatesPath, Utils.toJSON(updates), -1, true);
+                }
               }
 
             } catch (InterruptedException | AlreadyClosedException e) {
               log.info("We have been closed or one of our resources has, bailing {}", e.getClass().getSimpleName() + ":" + e.getMessage());
-              throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+
             } catch (KeeperException.SessionExpiredException e) {
               log.error("", e);
-              throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+
             } catch (Exception e) {
               log.error("Failed processing update=" + collection, e);
             }
@@ -482,7 +518,7 @@ public class ZkStateWriter {
   public ClusterState getClusterstate(boolean stateUpdate) {
     ourLock.lock();
     try {
-      return new ClusterState(reader.getLiveNodes(), cs.getCollectionsMap());
+      return ClusterState.getRefCS(cs.getCollectionsMap(), -2);
     } finally {
       ourLock.unlock();
     }
@@ -493,7 +529,13 @@ public class ZkStateWriter {
     try {
       stateUpdates.remove(collection);
       cs.getCollectionStates().remove(collection);
-      //reader.getZkClient().delete(ZkStateReader.getCollectionSCNPath(collection), -1);
+      trackVersions.remove(collection);
+      reader.getZkClient().delete(ZkStateReader.getCollectionSCNPath(collection), -1);
+      reader.getZkClient().delete(ZkStateReader.getCollectionStateUpdatesPath(collection), -1);
+    } catch (InterruptedException e) {
+      log.error("", e);
+    } catch (KeeperException e) {
+      log.error("", e);
     } finally {
       ourLock.unlock();
     }
diff --git a/solr/core/src/java/org/apache/solr/core/BlobRepository.java b/solr/core/src/java/org/apache/solr/core/BlobRepository.java
index 922a9d9..2e9d2c9 100644
--- a/solr/core/src/java/org/apache/solr/core/BlobRepository.java
+++ b/solr/core/src/java/org/apache/solr/core/BlobRepository.java
@@ -241,7 +241,7 @@ public class BlobRepository {
       Collections.shuffle(replicas, RANDOM);
       for (Replica r : replicas) {
         if (r.getState() == Replica.State.ACTIVE) {
-          if (zkStateReader.getClusterState().getLiveNodes().contains(r.get(ZkStateReader.NODE_NAME_PROP))) {
+          if (zkStateReader.getLiveNodes().contains(r.get(ZkStateReader.NODE_NAME_PROP))) {
             replica = r;
             break;
           }
diff --git a/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
index 9d95ef7..41f120e 100644
--- a/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
@@ -524,6 +524,8 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
 
       CacheValue cacheValue = byDirectoryCache.get(directory);
       if (cacheValue == null) {
+        org.apache.solr.common.util.IOUtils.closeQuietly(directory);
+        assert ObjectReleaseTracker.release(directory);
         throw new IllegalArgumentException("Unknown directory: " + directory
                 + " " + byDirectoryCache);
       }
@@ -538,7 +540,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
       //    }
       cacheValue.refCnt--;
 
-      if (cacheValue.refCnt == 0 && cacheValue.doneWithDir) {
+      if (cacheValue.refCnt == 0 && cacheValue.doneWithDir ||  closed) {
         boolean cl = closeCacheValue(cacheValue);
         if (cl) {
           removeFromCache(cacheValue);
diff --git a/solr/core/src/java/org/apache/solr/core/ConfigSetService.java b/solr/core/src/java/org/apache/solr/core/ConfigSetService.java
index 77e3e6b..46ac7da 100644
--- a/solr/core/src/java/org/apache/solr/core/ConfigSetService.java
+++ b/solr/core/src/java/org/apache/solr/core/ConfigSetService.java
@@ -28,8 +28,8 @@ import com.github.benmanes.caffeine.cache.Caffeine;
 import org.apache.solr.cloud.CloudConfigSetService;
 import org.apache.solr.cloud.ZkController;
 import org.apache.solr.cloud.ZkSolrResourceLoader;
-import org.apache.solr.common.ParWork;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.IOUtils;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.IndexSchemaFactory;
@@ -90,11 +90,13 @@ public abstract class ConfigSetService {
               && !flags.getBooleanArg("trusted")
               ) ? false: true;
 
-      SolrConfig solrConfig = createSolrConfig(dcore, coreLoader, true);
+      log.info("Trusted configset={} {}", trusted, flags);
+
+      SolrConfig solrConfig = createSolrConfig(dcore, coreLoader, trusted);
       IndexSchema schema = createIndexSchema(dcore, solrConfig);
-      return new ConfigSet(configSetName(dcore), solrConfig, schema, properties, true);
+      return new ConfigSet(configSetName(dcore), solrConfig, schema, properties, trusted);
     } catch (Exception e) {
-      ParWork.propagateInterrupt(e);
+      IOUtils.closeQuietly(coreLoader);
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
           "Could not load conf for core " + dcore.getName() +
               ": " + e.getMessage(), e);
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 21444df..767a0c8 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -18,7 +18,6 @@ package org.apache.solr.core;
 
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Maps;
-import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.http.auth.AuthSchemeProvider;
 import org.apache.http.client.CredentialsProvider;
@@ -37,8 +36,8 @@ import org.apache.solr.client.solrj.impl.XMLResponseParser;
 import org.apache.solr.client.solrj.io.SolrClientCache;
 import org.apache.solr.client.solrj.util.SolrIdentifierValidator;
 import org.apache.solr.cloud.CloudDescriptor;
+import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.cloud.Overseer;
-import org.apache.solr.cloud.OverseerTaskQueue;
 import org.apache.solr.cloud.ZkController;
 import org.apache.solr.cloud.overseer.OverseerAction;
 import org.apache.solr.common.AlreadyClosedException;
@@ -102,6 +101,7 @@ import org.apache.solr.security.SecurityPluginHolder;
 import org.apache.solr.update.SolrCoreState;
 import org.apache.solr.update.UpdateShardHandler;
 import org.apache.solr.util.RefCounted;
+import org.apache.solr.util.SystemIdResolver;
 import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -120,9 +120,11 @@ import static org.apache.solr.common.params.CommonParams.ZK_STATUS_PATH;
 import static org.apache.solr.core.CorePropertiesLocator.PROPERTIES_FILENAME;
 import static org.apache.solr.security.AuthenticationPlugin.AUTHENTICATION_PLUGIN_PROP;
 import java.io.Closeable;
+import java.io.File;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.nio.file.Files;
+import java.nio.file.NoSuchFileException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.security.spec.InvalidKeySpecException;
@@ -130,6 +132,7 @@ import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Comparator;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.LinkedHashSet;
@@ -145,7 +148,7 @@ import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Future;
 import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.TimeoutException;
+import java.util.concurrent.locks.ReentrantLock;
 
 /**
  * @since solr 1.3
@@ -155,8 +158,8 @@ public class CoreContainer implements Closeable {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   static {
-    log.warn("expected pre init of xml factories {} {} {} {} {}", XmlConfigFile.xpathFactory,
-        FieldTypeXmlAdapter.dbf, XMLResponseParser.inputFactory, XMLResponseParser.saxFactory, XmlConfigFile.getXpath());
+    log.warn("expected pre init of xml factories {} {} {} {} {}",
+        FieldTypeXmlAdapter.dbf, XMLResponseParser.inputFactory, XMLResponseParser.saxFactory);
   }
 
   final SolrCores solrCores = new SolrCores(this);
@@ -199,6 +202,8 @@ public class CoreContainer implements Closeable {
 
   public volatile ExecutorService solrCoreLoadExecutor;
 
+  public volatile ExecutorService solrCoreCloseExecutor;
+
   private final OrderedExecutor replayUpdatesExecutor;
 
   @SuppressWarnings({"rawtypes"})
@@ -215,7 +220,7 @@ public class CoreContainer implements Closeable {
 
   private final BlobRepository blobRepository = new BlobRepository(this);
 
-  private volatile PluginBag<SolrRequestHandler> containerHandlers = new PluginBag<>(SolrRequestHandler.class, null);
+  private final PluginBag<SolrRequestHandler> containerHandlers = new PluginBag<>(SolrRequestHandler.class, null);
 
   private volatile boolean asyncSolrCoreLoad;
 
@@ -250,6 +255,24 @@ public class CoreContainer implements Closeable {
 
   // private Set<Future> zkRegFutures = zkRegFutures = ConcurrentHashMap.newKeySet();
 
+  private SystemIdResolver sysIdResolver;
+ // public XPathFactoryImpl xpathFactory = new XPathFactoryImpl();
+
+
+  {
+//    PipelineConfiguration plc = XmlConfigFile.conf1.makePipelineConfiguration();
+//
+//    ParseOptions po = plc.getParseOptions();
+//    // if (is.getSystemId() != null) {
+//    po.setEntityResolver(CoreContainer..getSysIdResolver());
+//    // }
+//
+//    po.setXIncludeAware(true);
+//    po.setSchemaValidationMode(0);
+//    po.setExpandAttributeDefaults(true);
+//    //conf.setURIResolver(sysIdResolver.asURIResolver());
+//    xpathFactory.setConfiguration(plc.getConfiguration());
+  }
 
   // Bits for the state variable.
   public final static long LOAD_COMPLETE = 0x1L;
@@ -298,8 +321,8 @@ public class CoreContainer implements Closeable {
    * @param properties substitutable properties (alternative to Sys props)
    * @see #load()
    */
-  public CoreContainer(Path solrHome, Properties properties) {
-    this(SolrXmlConfig.fromSolrHome(solrHome, properties));
+  public CoreContainer(Path solrHome, Properties properties) throws IOException {
+    this(new SolrXmlConfig().fromSolrHome(solrHome, properties));
   }
 
   /**
@@ -338,6 +361,7 @@ public class CoreContainer implements Closeable {
     }
 
     this.loader = config.getSolrResourceLoader();
+
     this.solrHome = config.getSolrHome();
     this.cfg = requireNonNull(config);
 
@@ -355,15 +379,18 @@ public class CoreContainer implements Closeable {
     metricManager = new SolrMetricManager(loader, cfg.getMetricsConfig());
     String registryName = SolrMetricManager.getRegistryName(SolrInfoBean.Group.node);
     solrMetricsContext = new SolrMetricsContext(metricManager, registryName, metricTag);
+
     try (ParWork work = new ParWork(this)) {
 
-      work.collect("", () -> {
-        try {
-          containerHandlers.put(PublicKeyHandler.PATH, new PublicKeyHandler(cfg.getCloudConfig()));
-        } catch (IOException | InvalidKeySpecException e) {
-          throw new RuntimeException("Bad PublicKeyHandler configuration.", e);
-        }
-      });
+      if (Boolean.getBoolean("solr.enablePublicKeyHandler")) {
+        work.collect("", () -> {
+          try {
+            containerHandlers.put(PublicKeyHandler.PATH, new PublicKeyHandler(cfg.getCloudConfig()));
+          } catch (IOException | InvalidKeySpecException e) {
+            throw new RuntimeException("Bad PublicKeyHandler configuration.", e);
+          }
+        });
+      }
 
       work.collect("",() -> {
         updateShardHandler = new UpdateShardHandler(cfg.getUpdateShardHandlerConfig());
@@ -389,8 +416,12 @@ public class CoreContainer implements Closeable {
     containerProperties.putAll(cfg.getSolrProperties());
 
 
-    solrCoreLoadExecutor = new PerThreadExecService(ParWork.getRootSharedExecutor(), Math.max(32, Runtime.getRuntime().availableProcessors()),
+    solrCoreLoadExecutor = new PerThreadExecService(ParWork.getRootSharedExecutor(), Math.max(16, Runtime.getRuntime().availableProcessors()),
         false, false);
+
+    solrCoreCloseExecutor = new PerThreadExecService(ParWork.getRootSharedExecutor(), Math.max(6, Runtime.getRuntime().availableProcessors() / 2),
+        false, false);
+
   }
 
   @SuppressWarnings({"unchecked"})
@@ -585,11 +616,8 @@ public class CoreContainer implements Closeable {
     isZkAware = false;
   }
 
-  public static CoreContainer createAndLoad(Path solrHome) {
-    return createAndLoad(solrHome, solrHome.resolve(SolrXmlConfig.SOLR_XML_FILE));
-  }
 
-  public static CoreContainer createAndLoad(Path solrHome, Path configFile) {
+  public static CoreContainer createAndLoad(Path solrHome, Path configFile) throws IOException {
     return createAndLoad(solrHome, configFile, null);
   }
   /**
@@ -599,8 +627,8 @@ public class CoreContainer implements Closeable {
    * @param configFile the file containing this container's configuration
    * @return a loaded CoreContainer
    */
-  public static CoreContainer createAndLoad(Path solrHome, Path configFile, SolrZkClient zkClient) {
-    NodeConfig config = SolrXmlConfig.fromFile(solrHome, configFile, new Properties());
+  public static CoreContainer createAndLoad(Path solrHome, Path configFile, SolrZkClient zkClient) throws IOException {
+    NodeConfig config = new SolrXmlConfig().fromFile(solrHome, configFile, new Properties());
     CoreContainer cc = new CoreContainer(zkClient, config, new CorePropertiesLocator(config.getCoreRootDirectory()), true);
     try {
       cc.load();
@@ -670,16 +698,6 @@ public class CoreContainer implements Closeable {
 
     loaded = true;
 
-    if (isZooKeeperAware()) {
-      try {
-        zkSys.start(this);
-      } catch (IOException e) {
-        throw new SolrException(ErrorCode.SERVER_ERROR, e);
-      } catch (KeeperException e) {
-        throw new SolrException(ErrorCode.SERVER_ERROR, e);
-      }
-    }
-
     // Always add $SOLR_HOME/lib to the shared resource loader
     Set<String> libDirs = new LinkedHashSet<>();
     libDirs.add("lib");
@@ -710,96 +728,119 @@ public class CoreContainer implements Closeable {
     containerHandlers.getApiBag().registerObject(packageStoreAPI.readAPI);
     containerHandlers.getApiBag().registerObject(packageStoreAPI.writeAPI);
 
-    solrClientCache = new SolrClientCache(isZooKeeperAware() ? zkSys.getZkController().getZkStateReader() : null, updateShardHandler.getTheSharedHttpClient());
-
     // initialize CalciteSolrDriver instance to use this solrClientCache
     CalciteSolrDriver.INSTANCE.setSolrClientCache(solrClientCache);
 
-    try (ParWork work = new ParWork(this)) {
+    try {
 
-      work.collect("", () -> {
         solrCores.load(loader);
 
         logging = LogWatcher.newRegisteredLogWatcher(cfg.getLogWatcherConfig(), loader);
 
         hostName = cfg.getNodeName();
 
+        collectionsHandler = createHandler(COLLECTIONS_HANDLER_PATH, cfg.getCollectionsHandlerClass(), CollectionsHandler.class);
+        infoHandler = createHandler(INFO_HANDLER_PATH, cfg.getInfoHandlerClass(), InfoHandler.class);
+        coreAdminHandler = createHandler(CORES_HANDLER_PATH, cfg.getCoreAdminHandlerClass(), CoreAdminHandler.class);
+        configSetsHandler = createHandler(CONFIGSETS_HANDLER_PATH, cfg.getConfigSetsHandlerClass(), ConfigSetsHandler.class);
+
+        createHandler(ZK_PATH, ZookeeperInfoHandler.class.getName(), ZookeeperInfoHandler.class);
+        createHandler(ZK_STATUS_PATH, ZookeeperStatusHandler.class.getName(), ZookeeperStatusHandler.class);
+
+
         if (isZooKeeperAware()) {
-          pkiAuthenticationPlugin = new PKIAuthenticationPlugin(this, zkSys.getZkController().getNodeName(), (PublicKeyHandler) containerHandlers.get(PublicKeyHandler.PATH));
-          // use deprecated API for back-compat, remove in 9.0
-          pkiAuthenticationPlugin.initializeMetrics(solrMetricsContext, "/authentication/pki");
-          TracerConfigurator.loadTracer(loader, cfg.getTracerConfiguratorPluginInfo(), getZkController().getZkStateReader());
-          packageLoader = new PackageLoader(this);
-          containerHandlers.getApiBag().registerObject(packageLoader.getPackageAPI().editAPI);
-          containerHandlers.getApiBag().registerObject(packageLoader.getPackageAPI().readAPI);
-          ZookeeperReadAPI zookeeperReadAPI = new ZookeeperReadAPI(this);
-          containerHandlers.getApiBag().registerObject(zookeeperReadAPI);
+          try {
+            zkSys.start(this);
+          } catch (IOException e) {
+            throw new SolrException(ErrorCode.SERVER_ERROR, e);
+          } catch (KeeperException e) {
+            throw new SolrException(ErrorCode.SERVER_ERROR, e);
+          }
         }
-      });
 
-      work.collect("", () -> {
-        MDCLoggingContext.setNode(this);
 
-        securityConfHandler = isZooKeeperAware() ? new SecurityConfHandlerZk(this) : new SecurityConfHandlerLocal(this);
-        reloadSecurityProperties();
-        warnUsersOfInsecureSettings();
-        this.backupRepoFactory = new BackupRepositoryFactory(cfg.getBackupRepositoryPlugins());
-      });
+      try (ParWork work = new ParWork(this, false, true)) {
 
-      work.collect("", () -> {
-        createHandler(ZK_PATH, ZookeeperInfoHandler.class.getName(), ZookeeperInfoHandler.class);
-        createHandler(ZK_STATUS_PATH, ZookeeperStatusHandler.class.getName(), ZookeeperStatusHandler.class);
-      });
+        boolean enableMetrics = Boolean.getBoolean("solr.enableMetrics");
+        if (enableMetrics) {
+          work.collect("", () -> {
+            // metricsHistoryHandler uses metricsHandler, so create it first
+            metricsHandler = new MetricsHandler(this);
+            containerHandlers.put(METRICS_PATH, metricsHandler);
+            metricsHandler.initializeMetrics(solrMetricsContext, METRICS_PATH);
+          });
 
-      work.collect("", () -> {
-        collectionsHandler = createHandler(COLLECTIONS_HANDLER_PATH, cfg.getCollectionsHandlerClass(), CollectionsHandler.class);
-        infoHandler = createHandler(INFO_HANDLER_PATH, cfg.getInfoHandlerClass(), InfoHandler.class);
-      });
+          work.collect("", () -> {
+            metricsCollectorHandler = createHandler(MetricsCollectorHandler.HANDLER_PATH, MetricsCollectorHandler.class.getName(), MetricsCollectorHandler.class);
+            // may want to add some configuration here in the future
+            metricsCollectorHandler.init(null);
+          });
 
-      work.collect("", () -> {
-        // metricsHistoryHandler uses metricsHandler, so create it first
-        metricsHandler = new MetricsHandler(this);
-        containerHandlers.put(METRICS_PATH, metricsHandler);
-        metricsHandler.initializeMetrics(solrMetricsContext, METRICS_PATH);
-      });
+          work.collect("", () -> {
+            createMetricsHistoryHandler();
+          });
 
-      work.collect("", () -> {
-        metricsCollectorHandler = createHandler(MetricsCollectorHandler.HANDLER_PATH, MetricsCollectorHandler.class.getName(), MetricsCollectorHandler.class);
-        // may want to add some configuration here in the future
-        metricsCollectorHandler.init(null);
-      });
+          work.addCollect();
+        }
 
-      work.collect("", () -> {
-        if (securityConfHandler != null) {
-          containerHandlers.put(AUTHZ_PATH, securityConfHandler);
+        work.collect("", () -> {
+          MDCLoggingContext.setNode(this);
+          securityConfHandler = isZooKeeperAware() ? new SecurityConfHandlerZk(this) : new SecurityConfHandlerLocal(this);
           securityConfHandler.initializeMetrics(solrMetricsContext, AUTHZ_PATH);
           containerHandlers.put(AUTHC_PATH, securityConfHandler);
+          reloadSecurityProperties();
+          warnUsersOfInsecureSettings();
+          this.backupRepoFactory = new BackupRepositoryFactory(cfg.getBackupRepositoryPlugins());
+        });
+
+        if (isZooKeeperAware()) {
+          work.collect("", () -> {
+
+            pkiAuthenticationPlugin = new PKIAuthenticationPlugin(this, zkSys.getZkController().getNodeName(), (PublicKeyHandler) containerHandlers.get(PublicKeyHandler.PATH));
+            // use deprecated API for back-compat, remove in 9.0
+            pkiAuthenticationPlugin.initializeMetrics(solrMetricsContext, "/authentication/pki");
+            TracerConfigurator.loadTracer(loader, cfg.getTracerConfiguratorPluginInfo(), getZkController().getZkStateReader());
+            packageLoader = new PackageLoader(this);
+            containerHandlers.getApiBag().registerObject(packageLoader.getPackageAPI().editAPI);
+            containerHandlers.getApiBag().registerObject(packageLoader.getPackageAPI().readAPI);
+            ZookeeperReadAPI zookeeperReadAPI = new ZookeeperReadAPI(this);
+            containerHandlers.getApiBag().registerObject(zookeeperReadAPI);
+
+          });
         }
-      });
 
-      work.collect("", () -> {
-        PluginInfo[] metricReporters = cfg.getMetricsConfig().getMetricReporters();
-        metricManager.loadReporters(metricReporters, loader, this, null, null, SolrInfoBean.Group.node);
-        metricManager.loadReporters(metricReporters, loader, this, null, null, SolrInfoBean.Group.jvm);
-        metricManager.loadReporters(metricReporters, loader, this, null, null, SolrInfoBean.Group.jetty);
-      });
+        work.addCollect();
 
-      work.addCollect();
 
-      if (!Boolean.getBoolean("solr.disableMetricsHistoryHandler")) {
         work.collect("", () -> {
-          createMetricsHistoryHandler();
+          solrClientCache = new SolrClientCache(isZooKeeperAware() ? zkSys.getZkController().getZkStateReader() : null, updateShardHandler.getTheSharedHttpClient());
         });
-      }
 
-      //  work.addCollect();
-      work.collect("", () -> {
-        coreAdminHandler = createHandler(CORES_HANDLER_PATH, cfg.getCoreAdminHandlerClass(), CoreAdminHandler.class);
-        configSetsHandler = createHandler(CONFIGSETS_HANDLER_PATH, cfg.getConfigSetsHandlerClass(), ConfigSetsHandler.class);
-      });
+        work.collect("", () -> {
+          PluginInfo[] metricReporters = cfg.getMetricsConfig().getMetricReporters();
+          metricManager.loadReporters(metricReporters, loader, this, null, null, SolrInfoBean.Group.jvm);
+        });
+
+        work.collect("", () -> {
+          PluginInfo[] metricReporters = cfg.getMetricsConfig().getMetricReporters();
+          metricManager.loadReporters(metricReporters, loader, this, null, null, SolrInfoBean.Group.node);
+        });
... 33025 lines suppressed ...