You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by il...@apache.org on 2020/11/27 00:11:27 UTC

[lucene-solr] branch jira/solr-15004 updated: Added failing test AffinityPlacementFactoryTest.testPlacementWithExistingReplicas. There's a bug in AffinityPlacementPlugin

This is an automated email from the ASF dual-hosted git repository.

ilan pushed a commit to branch jira/solr-15004
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/jira/solr-15004 by this push:
     new d5325ac  Added failing test AffinityPlacementFactoryTest.testPlacementWithExistingReplicas. There's a bug in AffinityPlacementPlugin
d5325ac is described below

commit d5325ac7d3e8262374b92a965713c6f91d905c71
Author: Ilan Ginzburg <ig...@salesforce.com>
AuthorDate: Fri Nov 27 01:10:05 2020 +0100

    Added failing test AffinityPlacementFactoryTest.testPlacementWithExistingReplicas. There's a bug in AffinityPlacementPlugin
---
 .../plugins/AffinityPlacementFactory.java          |   2 +-
 .../apache/solr/cluster/placement/Builders.java    |  19 +++
 .../plugins/AffinityPlacementFactoryTest.java      | 170 +++++++++++++++++++--
 3 files changed, 179 insertions(+), 12 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactory.java b/solr/core/src/java/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactory.java
index 379e06c..16c7b41 100644
--- a/solr/core/src/java/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactory.java
+++ b/solr/core/src/java/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactory.java
@@ -371,7 +371,7 @@ public class AffinityPlacementFactory implements PlacementPluginFactory {
       // Build the set of candidate nodes, i.e. nodes not having (yet) a replica of the given shard
       Set<Node> candidateNodes = new HashSet<>(replicaTypeToNodes.get(replicaType));
 
-      // Count existing replicas per AZ. We count only instances the type of replica for which we need to do placement. This
+      // Count existing replicas per AZ. We count only instances of the type of replica for which we need to do placement. This
       // can be changed in the loop below if we want to count all replicas for the shard.
       Map<String, Integer> azToNumReplicas = new HashMap<>();
       // Add all "interesting" AZ's, i.e. AZ's for which there's a chance we can do placement.
diff --git a/solr/core/src/test/org/apache/solr/cluster/placement/Builders.java b/solr/core/src/test/org/apache/solr/cluster/placement/Builders.java
index f10135a..0c3d6e2 100644
--- a/solr/core/src/test/org/apache/solr/cluster/placement/Builders.java
+++ b/solr/core/src/test/org/apache/solr/cluster/placement/Builders.java
@@ -119,6 +119,13 @@ public class Builders {
     }
 
     /**
+     * @return The internal shards data structure to allow test code to modify the replica distribution to nodes.
+     */
+    public LinkedList<ShardBuilder> getShardBuilders() {
+      return shardBuilders;
+    }
+
+    /**
      * Initializes shard and replica builders for the collection based on passed parameters. Replicas are assigned round
      * robin to the nodes. The shard leader is the first NRT replica of each shard (or first TLOG is no NRT).
      * Shard and replica configuration can be modified afterwards, the returned builder hierarchy is a convenient starting point.
@@ -199,6 +206,14 @@ public class Builders {
       return this;
     }
 
+    public String getShardName() {
+      return shardName;
+    }
+
+    public LinkedList<ReplicaBuilder> getReplicaBuilders() {
+      return replicaBuilders;
+    }
+
     public ShardBuilder setReplicaBuilders(LinkedList<ReplicaBuilder> replicaBuilders) {
       this.replicaBuilders = replicaBuilders;
       return this;
@@ -246,6 +261,10 @@ public class Builders {
       return this;
     }
 
+    public Replica.ReplicaType getReplicaType() {
+      return replicaType;
+    }
+
     public ReplicaBuilder setReplicaType(Replica.ReplicaType replicaType) {
       this.replicaType = replicaType;
       return this;
diff --git a/solr/core/src/test/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactoryTest.java b/solr/core/src/test/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactoryTest.java
index e2d6ff7..8d8937a 100644
--- a/solr/core/src/test/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactoryTest.java
+++ b/solr/core/src/test/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactoryTest.java
@@ -76,7 +76,7 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
    * When it places a replica for an existing collection, it should pick the node with more cores that doesn't already have a replica for the shard.
    */
   private void testBasicPlacementInternal(boolean hasExistingCollection) throws Exception {
-    String collectionName = "testCollection";
+    String collectionName = "basicCollection";
 
     Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeNodes(2);
     LinkedList<Builders.NodeBuilder> nodeBuilders = clusterBuilder.getNodeBuilders();
@@ -117,7 +117,7 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
    */
   @Test
   public void testLowSpaceNode() throws Exception {
-    String collectionName = "testCollection";
+    String collectionName = "lowSpaceCollection";
 
     final int LOW_SPACE_NODE_INDEX = 0;
     final int NO_SPACE_NODE_INDEX = 1;
@@ -179,9 +179,161 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
     }
   }
 
+  /**
+   * Test that existing collection replicas are taken into account to not place more than one replica of a given type
+   * for a given shard per node
+   */
+  @Test
+  public void testPlacementWithExistingReplicas() throws Exception {
+    String collectionName = "existingCollection";
+
+    final int LOW_SPACE_NODE_INDEX = 0;
+    final int NO_SPACE_NODE_INDEX = 1;
+
+
+    // Cluster nodes and their attributes
+    Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeNodes(4);
+    LinkedList<Builders.NodeBuilder> nodeBuilders = clusterBuilder.getNodeBuilders();
+    int coresOnNode = 10;
+    for (Builders.NodeBuilder nodeBuilder : nodeBuilders) {
+      nodeBuilder.setCoreCount(coresOnNode).setFreeDiskGB(PRIORITIZED_FREE_DISK_GB + 1);
+      coresOnNode += 10;
+    }
+
+    // The collection already exists with shards and replicas
+    Builders.CollectionBuilder collectionBuilder = Builders.newCollectionBuilder(collectionName);
+    // Build the collection letting the code pick up nodes...
+    collectionBuilder.initializeShardsReplicas(2, 2, 1, 0, nodeBuilders);
+    // Now explicitly change the nodes to create the collection distribution we want to test:
+    // (note this collection is an illegal placement: shard 1 has two replicas on node 0. The placement plugin should still
+    // be able to place new replicas as long as they don't break the rules).
+    //  +--------------+----+----+----+----+
+    //  |         Node |  0 |  1 |  2 |  3 |
+    //  |Cores on node | 10 | 20 | 30 | 40 |
+    //  +----------------------------------+
+    //  |   Shard 1:   |    |    |    |    |
+    //  |         NRT  |  X |    |    |  X |
+    //  |         TLOG |  X |    |    |    |
+    //  +----------------------------------+
+    //  |   Shard 2:   |    |    |    |    |
+    //  |         NRT  |    |  X |    |  X |
+    //  |         TLOG |    |    |  X |    |
+    //  +--------------+----+----+----+----+
+
+    // The code below is not ideal... We only modify the parts of the collection that we need to change (replica nodes).
+    // If this only happens in this test then it is likely the simplest approach.
+    // If we need to do this elsewhere, we need to allow the collection builder to accept a sharding/replication
+    // description and place the replicas accordingly on nodes.
+    List<Builders.ShardBuilder> shardBuilders = collectionBuilder.getShardBuilders();
+    List<Builders.ReplicaBuilder> replicas;
+    Builders.ReplicaBuilder replica;
+
+    // Replicas of shard 1
+    replicas = shardBuilders.get(1 - 1).getReplicaBuilders();
+
+    // Shard 1 first NRT goes to node 0
+    replica = replicas.get(0);
+    assertEquals(Replica.ReplicaType.NRT, replica.getReplicaType());
+    replica.setReplicaNode(nodeBuilders.get(0));
+
+    // Shard 1 second NRT goes to node 3
+    replica = replicas.get(1);
+    assertEquals(Replica.ReplicaType.NRT, replica.getReplicaType());
+    replica.setReplicaNode(nodeBuilders.get(3));
+
+    // Shard 1 TLOG goes to node 0
+    replica = replicas.get(2);
+    assertEquals(Replica.ReplicaType.TLOG, replica.getReplicaType());
+    replica.setReplicaNode(nodeBuilders.get(0));
+
+    // Replicas of shard 2
+    replicas = shardBuilders.get(2 - 1).getReplicaBuilders();
+
+    // Shard 2 first NRT goes to node 1
+    replica = replicas.get(0);
+    assertEquals(Replica.ReplicaType.NRT, replica.getReplicaType());
+    replica.setReplicaNode(nodeBuilders.get(1));
+
+    // Shard 2 second NRT goes to node 3
+    replica = replicas.get(1);
+    assertEquals(Replica.ReplicaType.NRT, replica.getReplicaType());
+    replica.setReplicaNode(nodeBuilders.get(3));
+
+    // Shard 2 TLOG goes to node 2
+    replica = replicas.get(2);
+    assertEquals(Replica.ReplicaType.TLOG, replica.getReplicaType());
+    replica.setReplicaNode(nodeBuilders.get(2));
+
+    SolrCollection solrCollection = collectionBuilder.build();
+
+
+
+    List<Node> liveNodes = clusterBuilder.buildLiveNodes();
+
+    // We now request placement of 1 NRT and 1 TLOG for each shard. They must be placed on the most appropriate nodes,
+    // i.e. those that do not already have a replica for the shard and then on the node with the lowest
+    // number of cores. NRT are placed first.
+    // We therefore expect the placement of the new replicas to look like:
+    //  +--------------+----+----+----+----+
+    //  |         Node |  0 |  1 |  2 |  3 |
+    //  |Cores on node | 10 | 20 | 30 | 40 |
+    //  +----------------------------------+
+    //  |   Shard 1:   |    |    |    |    |
+    //  |         NRT  |  X |  N |    |  X |
+    //  |         TLOG |  X |    |  N |    |
+    //  +----------------------------------+
+    //  |   Shard 2:   |    |    |    |    |
+    //  |         NRT  |  N |  X |    |  X |
+    //  |         TLOG |    |  N |  X |    | <-- We don't really expect this. It should be impossible to place this TLOG with 4 nodes
+    //  +--------------+----+----+----+----+
+
+
+    // Place two replicas of each type for each shard
+    PlacementRequestImpl placementRequest = new PlacementRequestImpl(solrCollection, solrCollection.getShardNames(), new HashSet<>(liveNodes),
+        1, 1, 0);
+
+    PlacementPlan pp = plugin.computePlacement(clusterBuilder.build(), placementRequest, clusterBuilder.buildAttributeFetcher(), new PlacementPlanFactoryImpl());
+
+    // Let's check that the new replicas are placed where expected.
+    Set<ReplicaPlacement> replicaPlacements = pp.getReplicaPlacements();
+
+    // Each expected placement is represented as a string "shard replica-type node"
+    Set<String> expectedPlacements = Set.of("1 NRT 1", "1 TLOG 2", "2 NRT 0", "2 TLOG 1");
+    verifyPlacements(expectedPlacements, replicaPlacements, shardBuilders, liveNodes);
+  }
+
+
+  /**
+   * Verifies that a computed set of placements does match the expected placement on nodes.
+   * @param expectedPlacements a set of strings of the form {@code "1 NRT 3"} where 1 would be the shard index, NRT the
+   *                           replica type and 3 the node on which the replica is placed. Shards are 1 based. Nodes 0 based.
+   */
+  private static void verifyPlacements(Set<String> expectedPlacements, Set<ReplicaPlacement> computedPlacements,
+                                       List<Builders.ShardBuilder> shardBuilders, List<Node> liveNodes) {
+    assertEquals("Wrong number of computed placements", expectedPlacements.size(), computedPlacements.size());
+
+    // Prepare structures for looking up shard name index and node index
+    Map<String, Integer> shardNumbering = new HashMap<>();
+    int index = 1; // first shard is 1 not 0
+    for (Builders.ShardBuilder sb : shardBuilders) {
+      shardNumbering.put(sb.getShardName(), index++);
+    }
+    Map<Node, Integer> nodeNumbering = new HashMap<>();
+    index = 0;
+    for (Node n : liveNodes) {
+      nodeNumbering.put(n, index++);
+    }
+
+    Set<String> expected = new HashSet<>(expectedPlacements);
+    for (ReplicaPlacement p : computedPlacements) {
+      String lookUpPlacementResult = shardNumbering.get(p.getShardName()) + " " + p.getReplicaType().name() + " " +  nodeNumbering.get(p.getNode());
+      assertTrue(expected.remove(lookUpPlacementResult));
+    }
+  }
+
   @Test
   public void testAvailabilityZones() throws Exception {
-    String collectionName = "testCollection";
+    String collectionName = "azCollection";
     int NUM_NODES = 6;
     Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeNodes(NUM_NODES);
     for (int i = 0; i < NUM_NODES; i++) {
@@ -239,7 +391,7 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
 
   @Test
   public void testReplicaType() throws Exception {
-    String collectionName = "testCollection";
+    String collectionName = "replicaTypeCollection";
     int NUM_NODES = 6;
     Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeNodes(NUM_NODES);
     for (int i = 0; i < NUM_NODES; i++) {
@@ -306,7 +458,7 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
 
   @Test
   public void testFreeDiskConstraints() throws Exception {
-    String collectionName = "testCollection";
+    String collectionName = "freeDiskCollection";
     int NUM_NODES = 3;
     Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeNodes(NUM_NODES);
     Node smallNode = null;
@@ -344,8 +496,7 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
     }
   }
 
-  @Test
-  //@Ignore
+  @Test @Slow
   public void testScalability() throws Exception {
     log.info("==== numNodes ====");
     runTestScalability(1000, 100, 40, 40, 20);
@@ -367,10 +518,7 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
     runTestScalability(5000, 100, 2000, 0, 0);
   }
 
-  private void runTestScalability(int numNodes, int numShards,
-                                  int nrtReplicas, int tlogReplicas,
-                                  int pullReplicas) throws Exception {
-
+  private void runTestScalability(int numNodes, int numShards, int nrtReplicas, int tlogReplicas, int pullReplicas) throws Exception {
     String collectionName = "scaleCollection";
 
     Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeNodes(numNodes);