You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2020/07/26 23:22:44 UTC

[lucene-solr] branch master updated: SOLR-11868: CloudSolrClient.setIdField is confusing, it's really the routing field. Should be deprecated.

This is an automated email from the ASF dual-hosted git repository.

erick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 6bf5f4a  SOLR-11868: CloudSolrClient.setIdField is confusing, it's really the routing field. Should be deprecated.
6bf5f4a is described below

commit 6bf5f4a87f40cd9afd3b9104423d0fe51f287259
Author: Erick Erickson <Er...@gmail.com>
AuthorDate: Sun Jul 26 18:18:05 2020 -0400

    SOLR-11868: CloudSolrClient.setIdField is confusing, it's really the routing field. Should be deprecated.
---
 solr/CHANGES.txt                                   |   2 +
 .../test/org/apache/solr/cloud/RouteFieldTest.java | 163 +++++++++++++++++++++
 .../client/solrj/impl/BaseCloudSolrClient.java     |  37 +++--
 3 files changed, 191 insertions(+), 11 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 18b815a..30e76e8 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -162,6 +162,8 @@ Other Changes
 
 * SOLR-14676: Update commons-collections to 4.4 and use it in Solr (Erick Erickson)
 
+* SOLR-11868: Deprecate CloudSolrClient.setIdField, use information from Zookeeper (Erick Erickson)
+
 ==================  8.6.0 ==================
 
 Consult the lucene/CHANGES.txt file for additional, low level, changes in this release.
diff --git a/solr/core/src/test/org/apache/solr/cloud/RouteFieldTest.java b/solr/core/src/test/org/apache/solr/cloud/RouteFieldTest.java
new file mode 100644
index 0000000..4a30af1
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/RouteFieldTest.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud;
+
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.request.schema.SchemaRequest;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.solr.client.solrj.response.schema.SchemaResponse.*;
+import static org.apache.solr.common.util.Utils.makeMap;
+
+public class RouteFieldTest extends SolrCloudTestCase {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  private static final String ROUTE_FIELD = "route_field";
+  private static final String COLL_ROUTE = "routeFieldTest";
+  private static final String COLL_ID = "routeIdTest";
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    System.setProperty("managed.schema.mutable", "true");
+    configureCluster(1)
+        .addConfig("conf", configset("cloud-managed"))
+        .configure();
+  }
+
+  // Test for seeing if we actually respect the route field
+  // We should be able to create two collections, one using ID and one using
+  // a route field. We should then be able to index docs to each, using
+  // the route field and id field and find the same docs on the same shards.
+  @Test
+  public void routeFieldTest() throws Exception {
+    log.info("Starting routeFieldTest");
+
+    assertEquals("Failed to  create collection routeFieldTest",
+        0,
+        CollectionAdminRequest.createCollection(COLL_ROUTE, "conf", 2, 1)
+            .setRouterField(ROUTE_FIELD)
+            .process(cluster.getSolrClient()).getStatus());
+
+    List<SchemaRequest.Update> updateList = new ArrayList<>();
+    updateList.add(new SchemaRequest.AddField(makeMap("name", ROUTE_FIELD, "type", "string", "indexed", "true", "stored", "true")));
+    updateList.add(new SchemaRequest.AddField(makeMap("name", "sorter", "type", "int", "indexed", "true", "stored", "true")));
+    SchemaRequest.MultiUpdate multiUpdateRequest = new SchemaRequest.MultiUpdate(updateList);
+    UpdateResponse multipleUpdatesResponse = multiUpdateRequest.process(cluster.getSolrClient(), COLL_ROUTE);
+    assertNull("Error adding fields", multipleUpdatesResponse.getResponse().get("errors"));
+
+    assertEquals("Failed to  create collection routeIdTest"
+        , 0
+        , CollectionAdminRequest.createCollection(COLL_ID, "conf", 2, 1)
+            .process(cluster.getSolrClient()).getStatus());
+
+    // We now have two collections, add the same docs to each with the proper
+    // fields so the id field is used in one collection and ROUTE_FIELD in the other..
+    List<SolrInputDocument> docsRoute = new ArrayList<>();
+    List<SolrInputDocument> docsId = new ArrayList<>();
+    int lim = random().nextInt(50) + 50;
+    for (int idx = 0; idx < lim; ++idx) {
+      SolrInputDocument doc = new SolrInputDocument();
+      // id should be irrelevant for routing, but we want to insure that
+      // if somehow we _do_ use id to route, we don't use the same ID
+      // as the docs we're adding to the collection routed by id.
+      doc.addField("id", idx + 1_500_000);
+      doc.addField(ROUTE_FIELD, idx);
+      doc.addField("sorter", idx);
+      docsRoute.add(doc);
+
+      doc = new SolrInputDocument();
+      doc.addField("id", idx);
+      doc.addField("sorter", idx);
+      docsId.add(doc);
+    }
+    cluster.getSolrClient().add(COLL_ROUTE, docsRoute);
+    cluster.getSolrClient().add(COLL_ID, docsId);
+
+    cluster.getSolrClient().commit(COLL_ROUTE, true, true);
+    cluster.getSolrClient().commit(COLL_ID, true, true);
+
+    checkShardsHaveSameDocs();
+  }
+
+  private void checkShardsHaveSameDocs() throws IOException, SolrServerException {
+
+    CloudSolrClient client = cluster.getSolrClient();
+
+    DocCollection docColl = client.getZkStateReader().getClusterState().getCollection(COLL_ROUTE);
+    List<Replica> reps = new ArrayList<>(docColl.getSlice("shard1").getReplicas());
+    String urlRouteShard1 = reps.get(0).get("base_url") + "/" + reps.get(0).get("core");
+
+    reps = new ArrayList<>(docColl.getSlice("shard2").getReplicas());
+    String urlRouteShard2 = reps.get(0).get("base_url") + "/" + reps.get(0).get("core");
+
+    docColl = client.getZkStateReader().getClusterState().getCollection(COLL_ID);
+    reps = new ArrayList<>(docColl.getSlice("shard1").getReplicas());
+    String urlIdShard1 = reps.get(0).get("base_url") + "/" + reps.get(0).get("core");
+
+    reps = new ArrayList<>(docColl.getSlice("shard2").getReplicas());
+    String urlIdShard2 = reps.get(0).get("base_url") + "/" + reps.get(0).get("core");
+
+    assertNotEquals("URLs shouldn't be the same", urlRouteShard1, urlIdShard1);
+    assertNotEquals("URLs shouldn't be the same", urlRouteShard2, urlIdShard2);
+    compareShardDocs(urlIdShard1, urlRouteShard1);
+    compareShardDocs(urlIdShard2, urlRouteShard2);
+  }
+
+  private void compareShardDocs(String urlId, String urlRoute) throws IOException, SolrServerException {
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    QueryRequest request = new QueryRequest(params);
+    params.add("distrib", "false");
+    params.add(CommonParams.Q, "*:*");
+    params.add(CommonParams.SORT, "sorter asc");
+    params.add(CommonParams.ROWS, "1000");
+
+    HttpSolrClient httpSC = new HttpSolrClient.Builder(urlId).build();
+    SolrDocumentList docsId = (SolrDocumentList) httpSC.request(request).get("response");
+    httpSC.close();
+
+    httpSC = new HttpSolrClient.Builder(urlRoute).build();
+    SolrDocumentList docsRoute = (SolrDocumentList) httpSC.request(request).get("response");
+    httpSC.close();
+
+    assertEquals("We should have the exact same number of docs on each shard", docsId.getNumFound(), docsRoute.getNumFound());
+    for (int idx = 0; idx < docsId.getNumFound(); ++idx) {
+      int idId = Integer.parseInt((String) docsId.get(idx).getFieldValue("id"));
+      int idRoute = Integer.parseInt((String) docsRoute.get(idx).getFieldValue("id"));
+      assertEquals("Docs with Ids 1.5M different should be on exactly the same shard and in the same order when sorted",
+          idId, idRoute - 1_500_000);
+    }
+  }
+}
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseCloudSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseCloudSolrClient.java
index 6a38f7c..3895c86 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseCloudSolrClient.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseCloudSolrClient.java
@@ -107,10 +107,15 @@ public abstract class BaseCloudSolrClient extends SolrClient {
   private ExecutorService threadPool = ExecutorUtil
       .newMDCAwareCachedThreadPool(new SolrNamedThreadFactory(
           "CloudSolrClient ThreadPool"));
-  private String idField = ID;
+
+  // We can figure this out from the collection,
+  // there's no need to let this get out of sync.
+  @Deprecated(since = "8.7")
+  private String routeFieldDeprecated = null;
   public static final String STATE_VERSION = "_stateVer_";
   private long retryExpiryTime = TimeUnit.NANOSECONDS.convert(3, TimeUnit.SECONDS);//3 seconds or 3 million nanos
   private final Set<String> NON_ROUTABLE_PARAMS;
+
   {
     NON_ROUTABLE_PARAMS = new HashSet<>();
     NON_ROUTABLE_PARAMS.add(UpdateParams.EXPUNGE_DELETES);
@@ -289,17 +294,25 @@ public abstract class BaseCloudSolrClient extends SolrClient {
   }
 
   /**
-   * @param idField the field to route documents on.
+   * @param routeField the field to route documents on.
+   *                   <p>
+   *                   deprecated, the field is automatically determined from Zookeeper
    */
-  public void setIdField(String idField) {
-    this.idField = idField;
+  @Deprecated(since = "8.7")
+  public void setIdField(String routeField) {
+    log.warn("setIdField is deprecated, route field inferred from cluster state");
+    this.routeFieldDeprecated = routeField;
   }
 
   /**
    * @return the field that updates are routed on.
+   *
+   * deprecated, the field is automatically determined from Zookeeper
    */
+  @Deprecated (since = "8.7")
   public String getIdField() {
-    return idField;
+    log.warn("getIdField is deprecated, route field is in cluster state");
+    return routeFieldDeprecated;
   }
 
   /** Sets the default collection for request */
@@ -508,10 +521,12 @@ public abstract class BaseCloudSolrClient extends SolrClient {
     //Create the URL map, which is keyed on slice name.
     //The value is a list of URLs for each replica in the slice.
     //The first value in the list is the leader for the slice.
-    final Map<String,List<String>> urlMap = buildUrlMap(col, replicaListTransformer);
-    final Map<String, ? extends LBSolrClient.Req> routes = createRoutes(updateRequest, routableParams, col, router, urlMap, idField);
+    final Map<String, List<String>> urlMap = buildUrlMap(col, replicaListTransformer);
+    String routeField = (routeFieldDeprecated != null) ? routeFieldDeprecated :
+        (col.getRouter().getRouteField(col) == null) ? ID : col.getRouter().getRouteField(col);
+    final Map<String, ? extends LBSolrClient.Req> routes = createRoutes(updateRequest, routableParams, col, router, urlMap, routeField);
     if (routes == null) {
-      if (directUpdatesToLeadersOnly && hasInfoToFindLeaders(updateRequest, idField)) {
+      if (directUpdatesToLeadersOnly && hasInfoToFindLeaders(updateRequest, routeField)) {
         // we have info (documents with ids and/or ids to delete) with
         // which to find the leaders but we could not find (all of) them
         throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE,
@@ -626,9 +641,9 @@ public abstract class BaseCloudSolrClient extends SolrClient {
   }
 
   protected Map<String, ? extends LBSolrClient.Req> createRoutes(UpdateRequest updateRequest, ModifiableSolrParams routableParams,
-                                                       DocCollection col, DocRouter router, Map<String, List<String>> urlMap,
-                                                       String idField) {
-    return urlMap == null ? null : updateRequest.getRoutesToCollection(router, col, urlMap, routableParams, idField);
+                                                                 DocCollection col, DocRouter router, Map<String, List<String>> urlMap,
+                                                                 String routeField) {
+    return urlMap == null ? null : updateRequest.getRoutesToCollection(router, col, urlMap, routableParams, routeField);
   }
 
   private Map<String,List<String>> buildUrlMap(DocCollection col, ReplicaListTransformer replicaListTransformer) {