You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by is...@apache.org on 2019/10/15 09:13:34 UTC
[lucene-solr] branch master updated: SOLR-13793: Limiting number of
forwards to total replicas in collection to avoid deadly forwarding loops
This is an automated email from the ASF dual-hosted git repository.
ishan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new b8648c6 SOLR-13793: Limiting number of forwards to total replicas in collection to avoid deadly forwarding loops
b8648c6 is described below
commit b8648c60e781a684a6c017fb09cac87595c2a8fa
Author: Ishan Chattopadhyaya <is...@apache.org>
AuthorDate: Tue Oct 15 14:43:20 2019 +0530
SOLR-13793: Limiting number of forwards to total replicas in collection to avoid deadly forwarding loops
---
solr/CHANGES.txt | 4 +
.../java/org/apache/solr/servlet/HttpSolrCall.java | 26 +++-
.../solr/cloud/TestQueryingOnDownCollection.java | 159 +++++++++++++++++++++
3 files changed, 186 insertions(+), 3 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 705d03e..802ad32 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -322,6 +322,10 @@ Bug Fixes
* SOLR-13665: Added missing netty dependencies to solrJ and upgraded netty to v 4.1.29.Final (Jörn Franke, janhoy)
+* SOLR-13793: HttpSolrCall now maintains internal request count (_forwardedCount) for remote queries and limits them to
+ the number of replicas. This avoids making too many cascading calls to remote servers, which, if not restricted, can
+ bring down nodes containing the said collection (Kesharee Nandan Vishwakarma, Ishan Chattopadhyaya)
+
Other Changes
----------------------
diff --git a/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java b/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
index 042c392..5e56b73 100644
--- a/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
+++ b/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
@@ -139,6 +139,8 @@ public class HttpSolrCall {
public static final String ORIGINAL_USER_PRINCIPAL_HEADER = "originalUserPrincipal";
+ public static final String INTERNAL_REQUEST_COUNT = "_forwardedCount";
+
public static final Random random;
static {
// We try to make things reproducible in the context of our tests by initializing the random instance
@@ -446,7 +448,7 @@ public class HttpSolrCall {
}
}
- protected void extractRemotePath(String collectionName, String origCorename) throws UnsupportedEncodingException, KeeperException, InterruptedException {
+ protected void extractRemotePath(String collectionName, String origCorename) throws UnsupportedEncodingException, KeeperException, InterruptedException, SolrException {
assert core == null;
coreUrl = getRemoteCoreUrl(collectionName, origCorename);
// don't proxy for internal update requests
@@ -644,12 +646,19 @@ public class HttpSolrCall {
}
}
+ private String getQuerySting() {
+ int internalRequestCount = queryParams.getInt(INTERNAL_REQUEST_COUNT, 0);
+ ModifiableSolrParams updatedQueryParams = new ModifiableSolrParams(queryParams);
+ updatedQueryParams.set(INTERNAL_REQUEST_COUNT, internalRequestCount + 1);
+ return updatedQueryParams.toQueryString();
+ }
+
//TODO using Http2Client
private void remoteQuery(String coreUrl, HttpServletResponse resp) throws IOException {
HttpRequestBase method;
HttpEntity httpEntity = null;
try {
- String urlstr = coreUrl + queryParams.toQueryString();
+ String urlstr = coreUrl + getQuerySting();
boolean isPostOrPutRequest = "POST".equals(req.getMethod()) || "PUT".equals(req.getMethod());
if ("GET".equals(req.getMethod())) {
@@ -963,13 +972,15 @@ public class HttpSolrCall {
}
}
- protected String getRemoteCoreUrl(String collectionName, String origCorename) {
+ protected String getRemoteCoreUrl(String collectionName, String origCorename) throws SolrException {
ClusterState clusterState = cores.getZkController().getClusterState();
final DocCollection docCollection = clusterState.getCollectionOrNull(collectionName);
Slice[] slices = (docCollection != null) ? docCollection.getActiveSlicesArr() : null;
List<Slice> activeSlices = new ArrayList<>();
boolean byCoreName = false;
+ int totalReplicas = 0;
+
if (slices == null) {
byCoreName = true;
activeSlices = new ArrayList<>();
@@ -983,6 +994,9 @@ public class HttpSolrCall {
}
}
+ for (Slice s: activeSlices) {
+ totalReplicas += s.getReplicas().size();
+ }
if (activeSlices.isEmpty()) {
return null;
}
@@ -996,7 +1010,13 @@ public class HttpSolrCall {
String coreUrl = getCoreUrl(collectionName, origCorename, clusterState,
activeSlices, byCoreName, true);
+ // Avoid getting into a recursive loop of requests being forwarded by
+ // stopping forwarding and erroring out after (totalReplicas) forwards
if (coreUrl == null) {
+ if (queryParams.getInt(INTERNAL_REQUEST_COUNT, 0) > totalReplicas){
+ throw new SolrException(SolrException.ErrorCode.INVALID_STATE,
+ "No active replicas found for collection: " + collectionName);
+ }
coreUrl = getCoreUrl(collectionName, origCorename, clusterState,
activeSlices, byCoreName, false);
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestQueryingOnDownCollection.java b/solr/core/src/test/org/apache/solr/cloud/TestQueryingOnDownCollection.java
new file mode 100644
index 0000000..763cdd4
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/TestQueryingOnDownCollection.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud;
+
+import java.lang.invoke.MethodHandles;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrRequest;
+import org.apache.solr.client.solrj.impl.Http2SolrClient;
+import org.apache.solr.client.solrj.impl.PreemptiveBasicAuthClientBuilderFactory;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.util.Utils;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestQueryingOnDownCollection extends SolrCloudTestCase {
+
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+ private static final String COLLECTION_NAME = "infected";
+
+ private static final String USERNAME = "solr";
+ private static final String PASSWORD = "solr";
+
+ static {
+ System.setProperty("basicauth", String.format(Locale.ROOT,"{}:{}", USERNAME, PASSWORD));
+ }
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ configureCluster(3)
+ .addConfig("conf", configset("cloud-minimal"))
+ .withSecurityJson(STD_CONF)
+ .configure();
+ }
+
+ @Test
+ /**
+ * Assert that requests to "down collection", i.e. a collection which has all replicas in down state
+ * (but are hosted on nodes that are live), fail fast and throw meaningful exceptions
+ */
+ public void testQueryToDownCollectionShouldFailFast() throws Exception {
+
+ CollectionAdminRequest.createCollection(COLLECTION_NAME, "conf", 2, 1)
+ .setBasicAuthCredentials(USERNAME, PASSWORD)
+ .process(cluster.getSolrClient());
+
+ // Add some dummy documents
+ UpdateRequest update = (UpdateRequest) new UpdateRequest().setBasicAuthCredentials(USERNAME, PASSWORD);
+ for (int i = 0; i < 100; i++) {
+ update.add("id", Integer.toString(i));
+ }
+ update.commit(cluster.getSolrClient(), COLLECTION_NAME);
+
+ // Bring down replicas but keep nodes up. This could've been done by some combinations of collections API operations;
+ // however, to make it faster, altering cluster state directly! ;-)
+ downAllReplicas();
+
+ // assert all replicas are in down state
+ List<Replica> replicas = getCollectionState(COLLECTION_NAME).getReplicas();
+ for (Replica replica: replicas){
+ assertEquals(replica.getState(), Replica.State.DOWN);
+ }
+
+ // assert all nodes as active
+ assertEquals(3, cluster.getSolrClient().getClusterStateProvider().getLiveNodes().size());
+
+ SolrClient client = cluster.getJettySolrRunner(0).newClient();
+
+ SolrRequest req = new QueryRequest(new SolrQuery("*:*").setRows(0)).setBasicAuthCredentials(USERNAME, PASSWORD);
+
+ // Without the SOLR-13793 fix, this causes requests to "down collection" to pile up (until the nodes run out
+ // of serviceable threads and they crash, even for other collections hosted on the nodes).
+ SolrException error = expectThrows(SolrException.class,
+ "Request should fail after trying all replica nodes once",
+ () -> client.request(req, COLLECTION_NAME)
+ );
+
+ client.close();
+
+ assertEquals(error.code(), SolrException.ErrorCode.INVALID_STATE.code);
+ assertTrue(error.getMessage().contains("No active replicas found for collection: " + COLLECTION_NAME));
+
+ // run same set of tests on v2 client which uses V2HttpCall
+ Http2SolrClient v2Client = new Http2SolrClient.Builder(cluster.getJettySolrRunner(0).getBaseUrl().toString())
+ .build();
+ PreemptiveBasicAuthClientBuilderFactory factory = new PreemptiveBasicAuthClientBuilderFactory();
+ factory.setup(v2Client);
+
+ error = expectThrows(SolrException.class,
+ "Request should fail after trying all replica nodes once",
+ () -> v2Client.request(req, COLLECTION_NAME)
+ );
+
+ v2Client.close();
+
+ assertEquals(error.code(), SolrException.ErrorCode.INVALID_STATE.code);
+ assertTrue(error.getMessage().contains("No active replicas found for collection: " + COLLECTION_NAME));
+ }
+
+ private void downAllReplicas() throws Exception {
+ byte[] collectionState = cluster.getZkClient().getData("/collections/" + COLLECTION_NAME + "/state.json",
+ null, null, true);
+
+ Map<String,Map<String,?>> infectedState = (Map<String,Map<String,?>>) Utils.fromJSON(collectionState);
+ Map<String, Object> shards = (Map<String, Object>) infectedState.get(COLLECTION_NAME).get("shards");
+ for(Map.Entry<String, Object> shard: shards.entrySet()) {
+ Map<String, Object> replicas = (Map<String, Object>) ((Map<String, Object>) shard.getValue() ).get("replicas");
+ for (Map.Entry<String, Object> replica : replicas.entrySet()) {
+ ((Map<String, Object>) replica.getValue()).put("state", Replica.State.DOWN.toString());
+ }
+ }
+
+ cluster.getZkClient().setData("/collections/" + COLLECTION_NAME + "/state.json", Utils.toJSON(infectedState)
+ , true);
+ }
+
+ protected static final String STD_CONF = "{\n" +
+ " \"authentication\":{\n" +
+ " \"blockUnknown\": true,\n" +
+ " \"class\":\"solr.BasicAuthPlugin\",\n" +
+ " \"credentials\":{\"solr\":\"EEKn7ywYk5jY8vG9TyqlG2jvYuvh1Q7kCCor6Hqm320= 6zkmjMjkMKyJX6/f0VarEWQujju5BzxZXub6WOrEKCw=\"}\n" +
+ " },\n" +
+ " \"authorization\":{\n" +
+ " \"class\":\"solr.RuleBasedAuthorizationPlugin\",\n" +
+ " \"permissions\":[\n" +
+ " {\"name\":\"security-edit\", \"role\":\"admin\"},\n" +
+ " {\"name\":\"collection-admin-edit\", \"role\":\"admin\"},\n" +
+ " {\"name\":\"core-admin-edit\", \"role\":\"admin\"}\n" +
+ " ],\n" +
+ " \"user-role\":{\"solr\":\"admin\"}\n" +
+ " }\n" +
+ "}";
+
+
+}