You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2020/07/26 23:22:44 UTC
[lucene-solr] branch master updated: SOLR-11868:
CloudSolrClient.setIdField is confusing,
it's really the routing field. Should be deprecated.
This is an automated email from the ASF dual-hosted git repository.
erick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 6bf5f4a SOLR-11868: CloudSolrClient.setIdField is confusing, it's really the routing field. Should be deprecated.
6bf5f4a is described below
commit 6bf5f4a87f40cd9afd3b9104423d0fe51f287259
Author: Erick Erickson <Er...@gmail.com>
AuthorDate: Sun Jul 26 18:18:05 2020 -0400
SOLR-11868: CloudSolrClient.setIdField is confusing, it's really the routing field. Should be deprecated.
---
solr/CHANGES.txt | 2 +
.../test/org/apache/solr/cloud/RouteFieldTest.java | 163 +++++++++++++++++++++
.../client/solrj/impl/BaseCloudSolrClient.java | 37 +++--
3 files changed, 191 insertions(+), 11 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 18b815a..30e76e8 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -162,6 +162,8 @@ Other Changes
* SOLR-14676: Update commons-collections to 4.4 and use it in Solr (Erick Erickson)
+* SOLR-11868: Deprecate CloudSolrClient.setIdField, use information from Zookeeper (Erick Erickson)
+
================== 8.6.0 ==================
Consult the lucene/CHANGES.txt file for additional, low level, changes in this release.
diff --git a/solr/core/src/test/org/apache/solr/cloud/RouteFieldTest.java b/solr/core/src/test/org/apache/solr/cloud/RouteFieldTest.java
new file mode 100644
index 0000000..4a30af1
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/RouteFieldTest.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud;
+
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.request.schema.SchemaRequest;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.solr.client.solrj.response.schema.SchemaResponse.*;
+import static org.apache.solr.common.util.Utils.makeMap;
+
+public class RouteFieldTest extends SolrCloudTestCase {
+
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+ private static final String ROUTE_FIELD = "route_field";
+ private static final String COLL_ROUTE = "routeFieldTest";
+ private static final String COLL_ID = "routeIdTest";
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ System.setProperty("managed.schema.mutable", "true");
+ configureCluster(1)
+ .addConfig("conf", configset("cloud-managed"))
+ .configure();
+ }
+
+ // Test for seeing if we actually respect the route field
+ // We should be able to create two collections, one using ID and one using
+ // a route field. We should then be able to index docs to each, using
+ // the route field and id field and find the same docs on the same shards.
+ @Test
+ public void routeFieldTest() throws Exception {
+ log.info("Starting routeFieldTest");
+
+ assertEquals("Failed to create collection routeFieldTest",
+ 0,
+ CollectionAdminRequest.createCollection(COLL_ROUTE, "conf", 2, 1)
+ .setRouterField(ROUTE_FIELD)
+ .process(cluster.getSolrClient()).getStatus());
+
+ List<SchemaRequest.Update> updateList = new ArrayList<>();
+ updateList.add(new SchemaRequest.AddField(makeMap("name", ROUTE_FIELD, "type", "string", "indexed", "true", "stored", "true")));
+ updateList.add(new SchemaRequest.AddField(makeMap("name", "sorter", "type", "int", "indexed", "true", "stored", "true")));
+ SchemaRequest.MultiUpdate multiUpdateRequest = new SchemaRequest.MultiUpdate(updateList);
+ UpdateResponse multipleUpdatesResponse = multiUpdateRequest.process(cluster.getSolrClient(), COLL_ROUTE);
+ assertNull("Error adding fields", multipleUpdatesResponse.getResponse().get("errors"));
+
+ assertEquals("Failed to create collection routeIdTest"
+ , 0
+ , CollectionAdminRequest.createCollection(COLL_ID, "conf", 2, 1)
+ .process(cluster.getSolrClient()).getStatus());
+
+ // We now have two collections, add the same docs to each with the proper
+ // fields so the id field is used in one collection and ROUTE_FIELD in the other..
+ List<SolrInputDocument> docsRoute = new ArrayList<>();
+ List<SolrInputDocument> docsId = new ArrayList<>();
+ int lim = random().nextInt(50) + 50;
+ for (int idx = 0; idx < lim; ++idx) {
+ SolrInputDocument doc = new SolrInputDocument();
+ // id should be irrelevant for routing, but we want to insure that
+ // if somehow we _do_ use id to route, we don't use the same ID
+ // as the docs we're adding to the collection routed by id.
+ doc.addField("id", idx + 1_500_000);
+ doc.addField(ROUTE_FIELD, idx);
+ doc.addField("sorter", idx);
+ docsRoute.add(doc);
+
+ doc = new SolrInputDocument();
+ doc.addField("id", idx);
+ doc.addField("sorter", idx);
+ docsId.add(doc);
+ }
+ cluster.getSolrClient().add(COLL_ROUTE, docsRoute);
+ cluster.getSolrClient().add(COLL_ID, docsId);
+
+ cluster.getSolrClient().commit(COLL_ROUTE, true, true);
+ cluster.getSolrClient().commit(COLL_ID, true, true);
+
+ checkShardsHaveSameDocs();
+ }
+
+ private void checkShardsHaveSameDocs() throws IOException, SolrServerException {
+
+ CloudSolrClient client = cluster.getSolrClient();
+
+ DocCollection docColl = client.getZkStateReader().getClusterState().getCollection(COLL_ROUTE);
+ List<Replica> reps = new ArrayList<>(docColl.getSlice("shard1").getReplicas());
+ String urlRouteShard1 = reps.get(0).get("base_url") + "/" + reps.get(0).get("core");
+
+ reps = new ArrayList<>(docColl.getSlice("shard2").getReplicas());
+ String urlRouteShard2 = reps.get(0).get("base_url") + "/" + reps.get(0).get("core");
+
+ docColl = client.getZkStateReader().getClusterState().getCollection(COLL_ID);
+ reps = new ArrayList<>(docColl.getSlice("shard1").getReplicas());
+ String urlIdShard1 = reps.get(0).get("base_url") + "/" + reps.get(0).get("core");
+
+ reps = new ArrayList<>(docColl.getSlice("shard2").getReplicas());
+ String urlIdShard2 = reps.get(0).get("base_url") + "/" + reps.get(0).get("core");
+
+ assertNotEquals("URLs shouldn't be the same", urlRouteShard1, urlIdShard1);
+ assertNotEquals("URLs shouldn't be the same", urlRouteShard2, urlIdShard2);
+ compareShardDocs(urlIdShard1, urlRouteShard1);
+ compareShardDocs(urlIdShard2, urlRouteShard2);
+ }
+
+ private void compareShardDocs(String urlId, String urlRoute) throws IOException, SolrServerException {
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ QueryRequest request = new QueryRequest(params);
+ params.add("distrib", "false");
+ params.add(CommonParams.Q, "*:*");
+ params.add(CommonParams.SORT, "sorter asc");
+ params.add(CommonParams.ROWS, "1000");
+
+ HttpSolrClient httpSC = new HttpSolrClient.Builder(urlId).build();
+ SolrDocumentList docsId = (SolrDocumentList) httpSC.request(request).get("response");
+ httpSC.close();
+
+ httpSC = new HttpSolrClient.Builder(urlRoute).build();
+ SolrDocumentList docsRoute = (SolrDocumentList) httpSC.request(request).get("response");
+ httpSC.close();
+
+ assertEquals("We should have the exact same number of docs on each shard", docsId.getNumFound(), docsRoute.getNumFound());
+ for (int idx = 0; idx < docsId.getNumFound(); ++idx) {
+ int idId = Integer.parseInt((String) docsId.get(idx).getFieldValue("id"));
+ int idRoute = Integer.parseInt((String) docsRoute.get(idx).getFieldValue("id"));
+ assertEquals("Docs with Ids 1.5M different should be on exactly the same shard and in the same order when sorted",
+ idId, idRoute - 1_500_000);
+ }
+ }
+}
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseCloudSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseCloudSolrClient.java
index 6a38f7c..3895c86 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseCloudSolrClient.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseCloudSolrClient.java
@@ -107,10 +107,15 @@ public abstract class BaseCloudSolrClient extends SolrClient {
private ExecutorService threadPool = ExecutorUtil
.newMDCAwareCachedThreadPool(new SolrNamedThreadFactory(
"CloudSolrClient ThreadPool"));
- private String idField = ID;
+
+ // We can figure this out from the collection,
+ // there's no need to let this get out of sync.
+ @Deprecated(since = "8.7")
+ private String routeFieldDeprecated = null;
public static final String STATE_VERSION = "_stateVer_";
private long retryExpiryTime = TimeUnit.NANOSECONDS.convert(3, TimeUnit.SECONDS);//3 seconds or 3 million nanos
private final Set<String> NON_ROUTABLE_PARAMS;
+
{
NON_ROUTABLE_PARAMS = new HashSet<>();
NON_ROUTABLE_PARAMS.add(UpdateParams.EXPUNGE_DELETES);
@@ -289,17 +294,25 @@ public abstract class BaseCloudSolrClient extends SolrClient {
}
/**
- * @param idField the field to route documents on.
+ * @param routeField the field to route documents on.
+ * <p>
+ * deprecated, the field is automatically determined from Zookeeper
*/
- public void setIdField(String idField) {
- this.idField = idField;
+ @Deprecated(since = "8.7")
+ public void setIdField(String routeField) {
+ log.warn("setIdField is deprecated, route field inferred from cluster state");
+ this.routeFieldDeprecated = routeField;
}
/**
* @return the field that updates are routed on.
+ *
+ * deprecated, the field is automatically determined from Zookeeper
*/
+ @Deprecated (since = "8.7")
public String getIdField() {
- return idField;
+ log.warn("getIdField is deprecated, route field is in cluster state");
+ return routeFieldDeprecated;
}
/** Sets the default collection for request */
@@ -508,10 +521,12 @@ public abstract class BaseCloudSolrClient extends SolrClient {
//Create the URL map, which is keyed on slice name.
//The value is a list of URLs for each replica in the slice.
//The first value in the list is the leader for the slice.
- final Map<String,List<String>> urlMap = buildUrlMap(col, replicaListTransformer);
- final Map<String, ? extends LBSolrClient.Req> routes = createRoutes(updateRequest, routableParams, col, router, urlMap, idField);
+ final Map<String, List<String>> urlMap = buildUrlMap(col, replicaListTransformer);
+ String routeField = (routeFieldDeprecated != null) ? routeFieldDeprecated :
+ (col.getRouter().getRouteField(col) == null) ? ID : col.getRouter().getRouteField(col);
+ final Map<String, ? extends LBSolrClient.Req> routes = createRoutes(updateRequest, routableParams, col, router, urlMap, routeField);
if (routes == null) {
- if (directUpdatesToLeadersOnly && hasInfoToFindLeaders(updateRequest, idField)) {
+ if (directUpdatesToLeadersOnly && hasInfoToFindLeaders(updateRequest, routeField)) {
// we have info (documents with ids and/or ids to delete) with
// which to find the leaders but we could not find (all of) them
throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE,
@@ -626,9 +641,9 @@ public abstract class BaseCloudSolrClient extends SolrClient {
}
protected Map<String, ? extends LBSolrClient.Req> createRoutes(UpdateRequest updateRequest, ModifiableSolrParams routableParams,
- DocCollection col, DocRouter router, Map<String, List<String>> urlMap,
- String idField) {
- return urlMap == null ? null : updateRequest.getRoutesToCollection(router, col, urlMap, routableParams, idField);
+ DocCollection col, DocRouter router, Map<String, List<String>> urlMap,
+ String routeField) {
+ return urlMap == null ? null : updateRequest.getRoutesToCollection(router, col, urlMap, routableParams, routeField);
}
private Map<String,List<String>> buildUrlMap(DocCollection col, ReplicaListTransformer replicaListTransformer) {