You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2019/12/13 22:55:03 UTC

[lucene-solr] branch master updated (2feeb88 -> 5f8e65c)

This is an automated email from the ASF dual-hosted git repository.

yonik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git.


    from 2feeb88  SOLR-14026: Upgrade Jetty to 9.4.24.v20191120 and dropwizard to 4.1.2, moved to 8.5 in CHANGES.txt
     new fae6aeb  SOLR-13884: add ConcurrentCreateCollectionTest test
     new 73c5352  SOLR-13884: use policies, preferences
     new db65c82  SOLR-13884: detect multiple replicas on single node
     new 5f8e65c  SOLR-14079: fix SPLITSHARD splitByPrefix in async mode

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 solr/CHANGES.txt                                   |   4 +
 .../solr/cloud/api/collections/SplitShardCmd.java  |   3 +-
 .../ConcurrentCreateCollectionTest.java            | 292 +++++++++++++++++++++
 .../cloud/api/collections/SplitByPrefixTest.java   |  19 ++
 4 files changed, 316 insertions(+), 2 deletions(-)
 create mode 100644 solr/core/src/test/org/apache/solr/cloud/api/collections/ConcurrentCreateCollectionTest.java


[lucene-solr] 01/04: SOLR-13884: add ConcurrentCreateCollectionTest test

Posted by yo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

yonik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit fae6aebc74291280c8154e15e505de8bdbe0a349
Author: yonik <yo...@apache.org>
AuthorDate: Wed Oct 30 15:23:55 2019 -0400

    SOLR-13884: add ConcurrentCreateCollectionTest test
---
 .../ConcurrentCreateCollectionTest.java            | 152 +++++++++++++++++++++
 1 file changed, 152 insertions(+)

diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/ConcurrentCreateCollectionTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/ConcurrentCreateCollectionTest.java
new file mode 100644
index 0000000..042e14d
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/ConcurrentCreateCollectionTest.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud.api.collections;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.lucene.util.LuceneTestCase.Nightly;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.client.solrj.response.CollectionAdminResponse;
+import org.apache.solr.client.solrj.response.UpdateResponse;
+import org.apache.solr.cloud.MiniSolrCloudCluster;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.util.IOUtils;
+import org.apache.solr.common.util.TimeSource;
+import org.apache.solr.util.TimeOut;
+import org.apache.zookeeper.KeeperException;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ConcurrentCreateCollectionTest extends SolrCloudTestCase {
+  
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(2)
+        .addConfig("conf", configset("cloud-minimal"))
+        .configure();
+  }
+
+  @Before
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+  }
+
+  @After
+  @Override
+  public void tearDown() throws Exception {
+    super.tearDown();
+    cluster.deleteAllCollections();
+  }
+
+
+
+  public void testConcurrentCreatePlacement() throws Exception {
+    final int nThreads = 20;
+    final int createsPerThread = 1;
+    final int repFactor = 1;
+
+    final CloudSolrClient client = cluster.getSolrClient();
+
+    final AtomicInteger collectionNum = new AtomicInteger();
+    Thread[] indexThreads = new Thread[nThreads];
+
+
+    for (int i=0; i<nThreads; i++) {
+      indexThreads[i] = new Thread(() -> {
+        try {
+          for (int j=0; j<createsPerThread; j++) {
+            int num = collectionNum.incrementAndGet();
+            String collectionName = "collection" + num;
+            CollectionAdminRequest
+                .createCollection("collection" + num, "conf", 1, repFactor)
+                .setMaxShardsPerNode(1)
+                .process(client);
+            cluster.waitForActiveCollection(collectionName, 1, repFactor);
+            // Thread.sleep(5000);
+          }
+        } catch (Exception e) {
+          fail(e.getMessage());
+        }
+      });
+    }
+
+    for (Thread thread : indexThreads) {
+      thread.start();
+    }
+
+    for (Thread thread : indexThreads) {
+      thread.join();
+    }
+
+    Map<String,List<Replica>> map = new HashMap<>();
+    ClusterState cstate = client.getZkStateReader().getClusterState();
+    for (DocCollection collection : cstate.getCollectionsMap().values()) {
+      for (Replica replica : collection.getReplicas()) {
+        String url = replica.getBaseUrl();
+        List<Replica> replicas = map.get(url);
+        if (replicas == null) {
+          replicas = new ArrayList<>();
+          map.put(url, replicas);
+        }
+        replicas.add(replica);
+      }
+    }
+
+    // check if nodes are balanced
+    List<Replica> prev = null;
+    for (List<Replica> replicas : map.values()) {
+      if (prev != null && prev.size() != replicas.size()) {
+        log.error("UNBALANCED CLUSTER: prev node replica count=" + prev.size() + " current=" + replicas.size() + "\n" + cstate.getCollectionsMap());
+        log.error("Replica lists per node: " + map);
+        assertEquals(prev.size(), replicas.size());
+      }
+      prev = replicas;
+    }
+
+  }
+
+
+  
+}


[lucene-solr] 03/04: SOLR-13884: detect multiple replicas on single node

Posted by yo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

yonik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit db65c82c3914accd8117701b5d4c164a5b3cafc5
Author: yonik <yo...@apache.org>
AuthorDate: Mon Nov 4 11:24:36 2019 -0500

    SOLR-13884: detect multiple replicas on single node
---
 .../ConcurrentCreateCollectionTest.java            | 160 +++++++++++++++++----
 1 file changed, 130 insertions(+), 30 deletions(-)

diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/ConcurrentCreateCollectionTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/ConcurrentCreateCollectionTest.java
index 8842b3c..6d103f1 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/ConcurrentCreateCollectionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/ConcurrentCreateCollectionTest.java
@@ -16,48 +16,31 @@
  */
 package org.apache.solr.cloud.api.collections;
 
-import java.io.IOException;
 import java.lang.invoke.MethodHandles;
-import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicReference;
 
-import org.apache.lucene.util.LuceneTestCase.Nightly;
-import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.client.solrj.SolrClient;
-import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
-import org.apache.solr.client.solrj.request.UpdateRequest;
-import org.apache.solr.client.solrj.response.CollectionAdminResponse;
-import org.apache.solr.client.solrj.response.UpdateResponse;
 import org.apache.solr.cloud.CloudTestUtils;
-import org.apache.solr.cloud.MiniSolrCloudCluster;
 import org.apache.solr.cloud.SolrCloudTestCase;
-import org.apache.solr.common.SolrDocument;
-import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
-import org.apache.solr.common.util.IOUtils;
-import org.apache.solr.common.util.TimeSource;
-import org.apache.solr.util.TimeOut;
-import org.apache.zookeeper.KeeperException;
+import org.apache.solr.common.cloud.Slice;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+
+
 public class ConcurrentCreateCollectionTest extends SolrCloudTestCase {
   
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -86,22 +69,94 @@ public class ConcurrentCreateCollectionTest extends SolrCloudTestCase {
   }
 
 
+  private CollectionAdminRequest.Create createCollectionRequest(String cname, int numShards, int numReplicas) throws Exception {
+    CollectionAdminRequest.Create creq = CollectionAdminRequest
+        // nocommit .createCollection(cname, "conf", NODES - 1, NODES - 1)
+        .createCollection(cname, "conf", numShards, numReplicas)
+        .setMaxShardsPerNode(100);
+    creq.setWaitForFinalState(true);
+    creq.setAutoAddReplicas(true);
+    return creq;
+  }
 
   public void testConcurrentCreatePlacement() throws Exception {
-    final int nThreads = 20;
+    final int nThreads = 2;
     final int createsPerThread = 1;
-    final int repFactor = 1;
-    final boolean useClusterPolicy = true;
-    final boolean useCollectionPolicy = false;
+    final int nShards = 1;
+    final int repFactor = 2;
+    final boolean useClusterPolicy = false;
+    final boolean useCollectionPolicy = true;
+    final boolean startUnbalanced = true; // can help make a smaller test that can still reproduce an issue.
+    final int unbalancedSize = 1; // the number of replicas to create first
+    final boolean stopNode = false;  // only applicable when startUnbalanced==true... stops a node during first collection creation, then restarts
 
     final CloudSolrClient client = cluster.getSolrClient();
 
 
+    if (startUnbalanced) {
+      /*** This produces a failure (multiple replicas of single shard on same node) when run with NODES=4 and
+       final int nThreads = 2;
+       final int createsPerThread = 1;
+       final int nShards = 2;
+       final int repFactor = 2;
+       final boolean useClusterPolicy = false;
+       final boolean useCollectionPolicy = true;
+       final boolean startUnbalanced = true;
+       // NOTE: useClusterPolicy=true seems to fix it! So does putting both creates in a single thread!
+       // NOTE: even creating a single replica to start with causes failure later on.
+
+       Also reproduced with smaller cluster: NODES=2 and
+       final int nThreads = 2;
+       final int createsPerThread = 1;
+       final int nShards = 1;
+       final int repFactor = 2;
+       final boolean useClusterPolicy = false;
+       final boolean useCollectionPolicy = true;
+       final boolean startUnbalanced = true;
+
+       Also, with NODES=3:
+       final int nThreads = 2;
+       final int createsPerThread = 1;
+       final int nShards = 1;
+       final int repFactor = 2;
+       final boolean useClusterPolicy = false;
+       final boolean useCollectionPolicy = true;
+       final boolean startUnbalanced = false;
+
+       // Also succeeded in replicating a bug where all 5 replicas were on a single node: CORES=5, nThreads=5, repFactor=5,
+       //    unbalancedSize = 16 (4 replicas on each of the up nodes), stopNode=true
+       ***/
+
+
+      JettySolrRunner downJetty = cluster.getJettySolrRunners().get(0);
+      if (stopNode) {
+        cluster.stopJettySolrRunner(downJetty);
+      }
+
+      String cname = "STARTCOLLECTION";
+      CollectionAdminRequest.Create creq = CollectionAdminRequest
+          // nocommit .createCollection(cname, "conf", NODES - 1, NODES - 1)
+          .createCollection(cname, "conf", unbalancedSize, 1)
+          .setMaxShardsPerNode(100);
+      creq.setWaitForFinalState(true);
+      // creq.setAutoAddReplicas(true);
+      if (useCollectionPolicy) { creq.setPolicy("policy1"); }
+      creq.process(client);
+
+      if (stopNode) {
+        // this will start it with a new port.... does it matter?
+        cluster.startJettySolrRunner(downJetty);
+      }
+    }
+
+
+
     if (useClusterPolicy) {
       String setClusterPolicyCommand = "{" +
           " 'set-cluster-policy': [" +
           // "      {'cores':'<100', 'node':'#ANY'}," +
           "      {'replica':'<2', 'shard': '#EACH', 'node': '#ANY'}," +
+          // "      {'replica':'<2', 'node': '#ANY'}," +
           "    ]" +
           "}";
 
@@ -110,12 +165,36 @@ public class ConcurrentCreateCollectionTest extends SolrCloudTestCase {
     }
 
     if (useCollectionPolicy) {
-      // NOTE: the meer act of setting this named policy prevents LegacyAssignStrategy from being used, even if the policy is
+      // NOTE: the mere act of setting this named policy prevents LegacyAssignStrategy from being used, even if the policy is
       // not used during collection creation.
-      String commands =  "{set-policy :{policy1 : [{replica:'<2' , node:'#ANY'}]}}";
+      String commands =  "{set-policy : {" +
+          " policy1 : [{replica:'<2' , node:'#ANY'}]" +
+          ",policy2 : [{replica:'<2' , shard:'#EACH', node:'#ANY'}]" +
+          "}}";
       client.request(CloudTestUtils.AutoScalingRequest.create(SolrRequest.METHOD.POST, commands));
+
+      /*** take defaults for cluster preferences
+      String cmd = "{" +
+          " 'set-cluster-preferences': [" +
+          // "      {'cores':'<100', 'node':'#ANY'}," +
+          "      {minimize:cores}" +
+          "    ]" +
+          "}";
+
+      SolrRequest req = CloudTestUtils.AutoScalingRequest.create(SolrRequest.METHOD.POST, cmd);
+      client.request(req);
+       ***/
     }
 
+    /***
+    SolrRequest req = CloudTestUtils.AutoScalingRequest.create(SolrRequest.METHOD.GET, null);
+    SolrResponse response = req.process(client);
+    log.info("######### AUTOSCALE " + response);
+     ***/
+
+
+    byte[] data = client.getZkStateReader().getZkClient().getData("/autoscaling.json", null, null, true);
+    log.info("AUTOSCALE DATA: " + new String(data, "UTF-8"));
 
     final AtomicInteger collectionNum = new AtomicInteger();
     Thread[] indexThreads = new Thread[nThreads];
@@ -125,14 +204,17 @@ public class ConcurrentCreateCollectionTest extends SolrCloudTestCase {
         try {
           for (int j=0; j<createsPerThread; j++) {
             int num = collectionNum.incrementAndGet();
+            // Thread.sleep(num*1000); // nocommit
             String collectionName = "collection" + num;
             CollectionAdminRequest.Create createReq = CollectionAdminRequest
-                .createCollection(collectionName, "conf", 1, repFactor)
-                .setMaxShardsPerNode(1);
+                .createCollection(collectionName, "conf", nShards, repFactor)
+                // .setMaxShardsPerNode(1) // should be default
+                ;
             createReq.setWaitForFinalState(false);
             if (useCollectionPolicy) {
               createReq.setPolicy("policy1");
             }
+            createReq.setAutoAddReplicas(true);
 
             createReq.process(client);
             // cluster.waitForActiveCollection(collectionName, 1, repFactor);
@@ -152,8 +234,9 @@ public class ConcurrentCreateCollectionTest extends SolrCloudTestCase {
       thread.join();
     }
 
-    int expectedTotalReplicas = nThreads * createsPerThread * repFactor;
+    int expectedTotalReplicas = unbalancedSize + nThreads * createsPerThread * nShards * repFactor;
     int expectedPerNode = expectedTotalReplicas / NODES;
+    boolean expectBalanced = (expectedPerNode * NODES == expectedTotalReplicas);
 
     Map<String,List<Replica>> replicaMap = new HashMap<>();
     ClusterState cstate = client.getZkStateReader().getClusterState();
@@ -173,11 +256,28 @@ public class ConcurrentCreateCollectionTest extends SolrCloudTestCase {
     boolean failed = false;
     for (List<Replica> replicas : replicaMap.values()) {
       if (replicas.size() != expectedPerNode ) {
-        failed = true;
+        if (expectBalanced) {
+          failed = true;
+        }
         log.error("UNBALANCED CLUSTER: expected replicas per node " + expectedPerNode +  " but got " + replicas.size());
       }
     }
 
+    // check if there were multiple replicas of the same shard placed on the same node
+    for (DocCollection collection : cstate.getCollectionsMap().values()) {
+      for (Slice slice : collection.getSlices()) {
+        Map<String, Replica> nodeToReplica = new HashMap<>();
+        for (Replica replica : slice.getReplicas()) {
+          Replica prev = nodeToReplica.put(replica.getBaseUrl(), replica);
+          if (prev != null) {
+            failed = true;
+            // NOTE: with a replication factor > 2, this will print multiple times per bad slice.
+            log.error("MULTIPLE REPLICAS OF SINGLE SHARD ON SAME NODE: r1=" + prev + " r2=" + replica);
+          }
+        }
+      }
+    }
+
     if (failed) {
       log.error("Cluster state " + cstate.getCollectionsMap());
     }


[lucene-solr] 02/04: SOLR-13884: use policies, preferences

Posted by yo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

yonik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 73c535261c53d61742516e633edea0036b0f3bf0
Author: yonik <yo...@apache.org>
AuthorDate: Thu Oct 31 13:13:22 2019 -0400

    SOLR-13884: use policies, preferences
---
 .../ConcurrentCreateCollectionTest.java            | 78 ++++++++++++++++------
 1 file changed, 59 insertions(+), 19 deletions(-)

diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/ConcurrentCreateCollectionTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/ConcurrentCreateCollectionTest.java
index 042e14d..8842b3c 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/ConcurrentCreateCollectionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/ConcurrentCreateCollectionTest.java
@@ -33,12 +33,14 @@ import org.apache.lucene.util.LuceneTestCase.Nightly;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.UpdateRequest;
 import org.apache.solr.client.solrj.response.CollectionAdminResponse;
 import org.apache.solr.client.solrj.response.UpdateResponse;
+import org.apache.solr.cloud.CloudTestUtils;
 import org.apache.solr.cloud.MiniSolrCloudCluster;
 import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.common.SolrDocument;
@@ -60,10 +62,13 @@ public class ConcurrentCreateCollectionTest extends SolrCloudTestCase {
   
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
+  static int NODES = 2;
+
   @BeforeClass
   public static void setupCluster() throws Exception {
-    configureCluster(2)
-        .addConfig("conf", configset("cloud-minimal"))
+    configureCluster(NODES)
+        // .addConfig("conf", configset("cloud-minimal"))
+        .addConfig("conf", configset("_default"))
         .configure();
   }
 
@@ -86,25 +91,52 @@ public class ConcurrentCreateCollectionTest extends SolrCloudTestCase {
     final int nThreads = 20;
     final int createsPerThread = 1;
     final int repFactor = 1;
+    final boolean useClusterPolicy = true;
+    final boolean useCollectionPolicy = false;
 
     final CloudSolrClient client = cluster.getSolrClient();
 
+
+    if (useClusterPolicy) {
+      String setClusterPolicyCommand = "{" +
+          " 'set-cluster-policy': [" +
+          // "      {'cores':'<100', 'node':'#ANY'}," +
+          "      {'replica':'<2', 'shard': '#EACH', 'node': '#ANY'}," +
+          "    ]" +
+          "}";
+
+      SolrRequest req = CloudTestUtils.AutoScalingRequest.create(SolrRequest.METHOD.POST, setClusterPolicyCommand);
+      client.request(req);
+    }
+
+    if (useCollectionPolicy) {
+      // NOTE: the meer act of setting this named policy prevents LegacyAssignStrategy from being used, even if the policy is
+      // not used during collection creation.
+      String commands =  "{set-policy :{policy1 : [{replica:'<2' , node:'#ANY'}]}}";
+      client.request(CloudTestUtils.AutoScalingRequest.create(SolrRequest.METHOD.POST, commands));
+    }
+
+
     final AtomicInteger collectionNum = new AtomicInteger();
     Thread[] indexThreads = new Thread[nThreads];
 
-
     for (int i=0; i<nThreads; i++) {
       indexThreads[i] = new Thread(() -> {
         try {
           for (int j=0; j<createsPerThread; j++) {
             int num = collectionNum.incrementAndGet();
             String collectionName = "collection" + num;
-            CollectionAdminRequest
-                .createCollection("collection" + num, "conf", 1, repFactor)
-                .setMaxShardsPerNode(1)
-                .process(client);
-            cluster.waitForActiveCollection(collectionName, 1, repFactor);
-            // Thread.sleep(5000);
+            CollectionAdminRequest.Create createReq = CollectionAdminRequest
+                .createCollection(collectionName, "conf", 1, repFactor)
+                .setMaxShardsPerNode(1);
+            createReq.setWaitForFinalState(false);
+            if (useCollectionPolicy) {
+              createReq.setPolicy("policy1");
+            }
+
+            createReq.process(client);
+            // cluster.waitForActiveCollection(collectionName, 1, repFactor);
+            // Thread.sleep(10000);
           }
         } catch (Exception e) {
           fail(e.getMessage());
@@ -120,31 +152,39 @@ public class ConcurrentCreateCollectionTest extends SolrCloudTestCase {
       thread.join();
     }
 
-    Map<String,List<Replica>> map = new HashMap<>();
+    int expectedTotalReplicas = nThreads * createsPerThread * repFactor;
+    int expectedPerNode = expectedTotalReplicas / NODES;
+
+    Map<String,List<Replica>> replicaMap = new HashMap<>();
     ClusterState cstate = client.getZkStateReader().getClusterState();
     for (DocCollection collection : cstate.getCollectionsMap().values()) {
       for (Replica replica : collection.getReplicas()) {
         String url = replica.getBaseUrl();
-        List<Replica> replicas = map.get(url);
+        List<Replica> replicas = replicaMap.get(url);
         if (replicas == null) {
           replicas = new ArrayList<>();
-          map.put(url, replicas);
+          replicaMap.put(url, replicas);
         }
         replicas.add(replica);
       }
     }
 
     // check if nodes are balanced
-    List<Replica> prev = null;
-    for (List<Replica> replicas : map.values()) {
-      if (prev != null && prev.size() != replicas.size()) {
-        log.error("UNBALANCED CLUSTER: prev node replica count=" + prev.size() + " current=" + replicas.size() + "\n" + cstate.getCollectionsMap());
-        log.error("Replica lists per node: " + map);
-        assertEquals(prev.size(), replicas.size());
+    boolean failed = false;
+    for (List<Replica> replicas : replicaMap.values()) {
+      if (replicas.size() != expectedPerNode ) {
+        failed = true;
+        log.error("UNBALANCED CLUSTER: expected replicas per node " + expectedPerNode +  " but got " + replicas.size());
       }
-      prev = replicas;
     }
 
+    if (failed) {
+      log.error("Cluster state " + cstate.getCollectionsMap());
+    }
+
+    assertEquals(replicaMap.size(),  NODES);  // make sure something was created
+
+    assertTrue(!failed);
   }
 
 


[lucene-solr] 04/04: SOLR-14079: fix SPLITSHARD splitByPrefix in async mode

Posted by yo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

yonik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 5f8e65c58f5849302d3aaacd65eea46eab258a57
Author: yonik <yo...@apache.org>
AuthorDate: Fri Dec 13 17:55:05 2019 -0500

    SOLR-14079: fix SPLITSHARD splitByPrefix in async mode
---
 solr/CHANGES.txt                                      |  4 ++++
 .../solr/cloud/api/collections/SplitShardCmd.java     |  3 +--
 .../solr/cloud/api/collections/SplitByPrefixTest.java | 19 +++++++++++++++++++
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index d03fe06..05ddf48 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -315,6 +315,10 @@ Bug Fixes
 
 * SOLR-14013: FIX: javabin performance regressions (noble, yonik, Houston Putman)
 
+* SOLR-14079: SPLITSHARD splitByPrefix doesn't work in async mode.  This also
+  affects splits triggered by the autoscale framework, which use async mode.
+  (Megan Carey, Andy Vuong, Bilal Waheed, Ilan Ginzburg, yonik)
+
 Other Changes
 ---------------------
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
index 333051a..26b1f0d 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
@@ -223,12 +223,11 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
         // params.set(NUM_SUB_SHARDS, Integer.toString(numSubShards));
 
         {
-          final ShardRequestTracker shardRequestTracker = ocmh.asyncRequestTracker(asyncId);
+          final ShardRequestTracker shardRequestTracker = ocmh.syncRequestTracker();
           shardRequestTracker.sendShardRequest(parentShardLeader.getNodeName(), params, shardHandler);
           SimpleOrderedMap<Object> getRangesResults = new SimpleOrderedMap<>();
           String msgOnError = "SPLITSHARD failed to invoke SPLIT.getRanges core admin command";
           shardRequestTracker.processResponses(getRangesResults, shardHandler, true, msgOnError);
-          handleFailureOnAsyncRequest(results, msgOnError);
 
           // Extract the recommended splits from the shard response (if it exists)
           // example response: getRangesResults={success={127.0.0.1:62086_solr={responseHeader={status=0,QTime=1},ranges=10-20,3a-3f}}}
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java
index 5ec53e5..cca4fe4 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java
@@ -148,6 +148,10 @@ public class SplitByPrefixTest extends SolrCloudTestCase {
 
   @Test
   public void doTest() throws IOException, SolrServerException {
+    // SPLITSHARD is recommended to be run in async mode, so we default to that.
+    // Also, autoscale triggers use async with splits as well.
+    boolean doAsync = true;
+
     CollectionAdminRequest
         .createCollection(COLLECTION_NAME, "conf", 1, 1)
         .setMaxShardsPerNode(100)
@@ -165,6 +169,9 @@ public class SplitByPrefixTest extends SolrCloudTestCase {
         .setNumSubShards(2)
         .setSplitByPrefix(true)
         .setShardName("shard1");
+    if (doAsync) {
+      splitShard.setAsyncId("SPLIT1");
+    }
     splitShard.process(client);
     waitForState("Timed out waiting for sub shards to be active.",
         COLLECTION_NAME, activeClusterShape(2, 3));  // expectedReplicas==3 because original replica still exists (just inactive)
@@ -187,6 +194,9 @@ public class SplitByPrefixTest extends SolrCloudTestCase {
     splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME)
         .setSplitByPrefix(true)
         .setShardName("shard1_1");  // should start out with the range of 0-7fffffff
+    if (doAsync) {
+      splitShard.setAsyncId("SPLIT2");
+    }
     splitShard.process(client);
     waitForState("Timed out waiting for sub shards to be active.",
         COLLECTION_NAME, activeClusterShape(3, 5));
@@ -216,6 +226,9 @@ public class SplitByPrefixTest extends SolrCloudTestCase {
     splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME)
         .setSplitByPrefix(true)
         .setShardName(slice1.getName());
+    if (doAsync) {
+      splitShard.setAsyncId("SPLIT3");
+    }
     splitShard.process(client);
     waitForState("Timed out waiting for sub shards to be active.",
         COLLECTION_NAME, activeClusterShape(4, 7));
@@ -236,6 +249,9 @@ public class SplitByPrefixTest extends SolrCloudTestCase {
     splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME)
         .setSplitByPrefix(true)
         .setShardName(slice1.getName());
+    if (doAsync) {
+      splitShard.setAsyncId("SPLIT4");
+    }
     splitShard.process(client);
     waitForState("Timed out waiting for sub shards to be active.",
         COLLECTION_NAME, activeClusterShape(5, 9));
@@ -252,6 +268,9 @@ public class SplitByPrefixTest extends SolrCloudTestCase {
     splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME)
         .setSplitByPrefix(true)
         .setShardName(slice1.getName());
+    if (doAsync) {
+      splitShard.setAsyncId("SPLIT5");
+    }
     splitShard.process(client);
     waitForState("Timed out waiting for sub shards to be active.",
         COLLECTION_NAME, activeClusterShape(6, 11));