You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by is...@apache.org on 2019/10/15 09:13:34 UTC

[lucene-solr] branch master updated: SOLR-13793: Limiting number of forwards to total replicas in collection to avoid deadly forwarding loops

This is an automated email from the ASF dual-hosted git repository.

ishan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new b8648c6  SOLR-13793: Limiting number of forwards to total replicas in collection to avoid deadly forwarding loops
b8648c6 is described below

commit b8648c60e781a684a6c017fb09cac87595c2a8fa
Author: Ishan Chattopadhyaya <is...@apache.org>
AuthorDate: Tue Oct 15 14:43:20 2019 +0530

    SOLR-13793: Limiting number of forwards to total replicas in collection to avoid deadly forwarding loops
---
 solr/CHANGES.txt                                   |   4 +
 .../java/org/apache/solr/servlet/HttpSolrCall.java |  26 +++-
 .../solr/cloud/TestQueryingOnDownCollection.java   | 159 +++++++++++++++++++++
 3 files changed, 186 insertions(+), 3 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 705d03e..802ad32 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -322,6 +322,10 @@ Bug Fixes
 
 * SOLR-13665: Added missing netty dependencies to solrJ and upgraded netty to v 4.1.29.Final (Jörn Franke, janhoy)
 
+* SOLR-13793: HttpSolrCall now maintains internal request count (_forwardedCount) for remote queries and limits them to
+  the number of replicas. This avoids making too many cascading calls to remote servers, which, if not restricted, can
+  bring down nodes containing the said collection (Kesharee Nandan Vishwakarma, Ishan Chattopadhyaya)
+
 Other Changes
 ----------------------
 
diff --git a/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java b/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
index 042c392..5e56b73 100644
--- a/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
+++ b/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
@@ -139,6 +139,8 @@ public class HttpSolrCall {
 
   public static final String ORIGINAL_USER_PRINCIPAL_HEADER = "originalUserPrincipal";
 
+  public static final String INTERNAL_REQUEST_COUNT = "_forwardedCount";
+
   public static final Random random;
   static {
     // We try to make things reproducible in the context of our tests by initializing the random instance
@@ -446,7 +448,7 @@ public class HttpSolrCall {
     }
   }
 
-  protected void extractRemotePath(String collectionName, String origCorename) throws UnsupportedEncodingException, KeeperException, InterruptedException {
+  protected void extractRemotePath(String collectionName, String origCorename) throws UnsupportedEncodingException, KeeperException, InterruptedException, SolrException {
     assert core == null;
     coreUrl = getRemoteCoreUrl(collectionName, origCorename);
     // don't proxy for internal update requests
@@ -644,12 +646,19 @@ public class HttpSolrCall {
     }
   }
 
+  private String getQuerySting() {
+    int internalRequestCount = queryParams.getInt(INTERNAL_REQUEST_COUNT, 0);
+    ModifiableSolrParams updatedQueryParams = new ModifiableSolrParams(queryParams);
+    updatedQueryParams.set(INTERNAL_REQUEST_COUNT, internalRequestCount + 1);
+    return updatedQueryParams.toQueryString();
+  }
+
   //TODO using Http2Client
   private void remoteQuery(String coreUrl, HttpServletResponse resp) throws IOException {
     HttpRequestBase method;
     HttpEntity httpEntity = null;
     try {
-      String urlstr = coreUrl + queryParams.toQueryString();
+      String urlstr = coreUrl + getQuerySting();
 
       boolean isPostOrPutRequest = "POST".equals(req.getMethod()) || "PUT".equals(req.getMethod());
       if ("GET".equals(req.getMethod())) {
@@ -963,13 +972,15 @@ public class HttpSolrCall {
     }
   }
 
-  protected String getRemoteCoreUrl(String collectionName, String origCorename) {
+  protected String getRemoteCoreUrl(String collectionName, String origCorename) throws SolrException {
     ClusterState clusterState = cores.getZkController().getClusterState();
     final DocCollection docCollection = clusterState.getCollectionOrNull(collectionName);
     Slice[] slices = (docCollection != null) ? docCollection.getActiveSlicesArr() : null;
     List<Slice> activeSlices = new ArrayList<>();
     boolean byCoreName = false;
 
+    int totalReplicas = 0;
+
     if (slices == null) {
       byCoreName = true;
       activeSlices = new ArrayList<>();
@@ -983,6 +994,9 @@ public class HttpSolrCall {
       }
     }
 
+    for (Slice s: activeSlices) {
+      totalReplicas += s.getReplicas().size();
+    }
     if (activeSlices.isEmpty()) {
       return null;
     }
@@ -996,7 +1010,13 @@ public class HttpSolrCall {
     String coreUrl = getCoreUrl(collectionName, origCorename, clusterState,
         activeSlices, byCoreName, true);
 
+    // Avoid getting into a recursive loop of requests being forwarded by
+    // stopping forwarding and erroring out after (totalReplicas) forwards
     if (coreUrl == null) {
+      if (queryParams.getInt(INTERNAL_REQUEST_COUNT, 0) > totalReplicas){
+        throw new SolrException(SolrException.ErrorCode.INVALID_STATE,
+            "No active replicas found for collection: " + collectionName);
+      }
       coreUrl = getCoreUrl(collectionName, origCorename, clusterState,
           activeSlices, byCoreName, false);
     }
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestQueryingOnDownCollection.java b/solr/core/src/test/org/apache/solr/cloud/TestQueryingOnDownCollection.java
new file mode 100644
index 0000000..763cdd4
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/TestQueryingOnDownCollection.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud;
+
+import java.lang.invoke.MethodHandles;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrRequest;
+import org.apache.solr.client.solrj.impl.Http2SolrClient;
+import org.apache.solr.client.solrj.impl.PreemptiveBasicAuthClientBuilderFactory;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.util.Utils;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestQueryingOnDownCollection extends SolrCloudTestCase {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  private static final String COLLECTION_NAME = "infected";
+
+  private static final String USERNAME = "solr";
+  private static final String PASSWORD = "solr";
+
+  static {
+    System.setProperty("basicauth", String.format(Locale.ROOT,"{}:{}", USERNAME, PASSWORD));
+  }
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(3)
+        .addConfig("conf", configset("cloud-minimal"))
+        .withSecurityJson(STD_CONF)
+        .configure();
+  }
+
+  @Test
+  /**
+   * Assert that requests to "down collection", i.e. a collection which has all replicas in down state
+   * (but are hosted on nodes that are live), fail fast and throw meaningful exceptions
+   */
+  public void testQueryToDownCollectionShouldFailFast() throws Exception {
+
+    CollectionAdminRequest.createCollection(COLLECTION_NAME, "conf", 2, 1)
+        .setBasicAuthCredentials(USERNAME, PASSWORD)
+        .process(cluster.getSolrClient());
+
+    // Add some dummy documents
+    UpdateRequest update = (UpdateRequest) new UpdateRequest().setBasicAuthCredentials(USERNAME, PASSWORD);
+    for (int i = 0; i < 100; i++) {
+      update.add("id", Integer.toString(i));
+    }
+    update.commit(cluster.getSolrClient(), COLLECTION_NAME);
+
+    // Bring down replicas but keep nodes up. This could've been done by some combinations of collections API operations;
+    // however, to make it faster, altering cluster state directly! ;-)
+    downAllReplicas();
+
+    // assert all replicas are in down state
+    List<Replica> replicas = getCollectionState(COLLECTION_NAME).getReplicas();
+    for (Replica replica: replicas){
+      assertEquals(replica.getState(), Replica.State.DOWN);
+    }
+
+    // assert all nodes as active
+    assertEquals(3, cluster.getSolrClient().getClusterStateProvider().getLiveNodes().size());
+
+    SolrClient client = cluster.getJettySolrRunner(0).newClient();
+
+    SolrRequest req = new QueryRequest(new SolrQuery("*:*").setRows(0)).setBasicAuthCredentials(USERNAME, PASSWORD);
+
+    // Without the SOLR-13793 fix, this causes requests to "down collection" to pile up (until the nodes run out 
+    // of serviceable threads and they crash, even for other collections hosted on the nodes).
+    SolrException error = expectThrows(SolrException.class,
+        "Request should fail after trying all replica nodes once",
+        () -> client.request(req, COLLECTION_NAME)
+    );
+
+    client.close();
+
+    assertEquals(error.code(), SolrException.ErrorCode.INVALID_STATE.code);
+    assertTrue(error.getMessage().contains("No active replicas found for collection: " + COLLECTION_NAME));
+
+    // run same set of tests on v2 client which uses V2HttpCall
+    Http2SolrClient v2Client = new Http2SolrClient.Builder(cluster.getJettySolrRunner(0).getBaseUrl().toString())
+        .build();
+    PreemptiveBasicAuthClientBuilderFactory factory = new PreemptiveBasicAuthClientBuilderFactory();
+    factory.setup(v2Client);
+
+    error = expectThrows(SolrException.class,
+        "Request should fail after trying all replica nodes once",
+        () -> v2Client.request(req, COLLECTION_NAME)
+    );
+
+    v2Client.close();
+
+    assertEquals(error.code(), SolrException.ErrorCode.INVALID_STATE.code);
+    assertTrue(error.getMessage().contains("No active replicas found for collection: " + COLLECTION_NAME));
+  }
+
+  private void downAllReplicas() throws Exception {
+    byte[] collectionState = cluster.getZkClient().getData("/collections/" + COLLECTION_NAME + "/state.json",
+        null, null, true);
+
+    Map<String,Map<String,?>> infectedState = (Map<String,Map<String,?>>) Utils.fromJSON(collectionState);
+    Map<String, Object> shards = (Map<String, Object>) infectedState.get(COLLECTION_NAME).get("shards");
+    for(Map.Entry<String, Object> shard: shards.entrySet()) {
+      Map<String, Object> replicas = (Map<String, Object>) ((Map<String, Object>) shard.getValue() ).get("replicas");
+      for (Map.Entry<String, Object> replica : replicas.entrySet()) {
+        ((Map<String, Object>) replica.getValue()).put("state", Replica.State.DOWN.toString());
+      }
+    }
+
+    cluster.getZkClient().setData("/collections/" + COLLECTION_NAME + "/state.json", Utils.toJSON(infectedState)
+        , true);
+  }
+
+  protected static final String STD_CONF = "{\n" +
+      "  \"authentication\":{\n" +
+      "   \"blockUnknown\": true,\n" +
+      "   \"class\":\"solr.BasicAuthPlugin\",\n" +
+      "   \"credentials\":{\"solr\":\"EEKn7ywYk5jY8vG9TyqlG2jvYuvh1Q7kCCor6Hqm320= 6zkmjMjkMKyJX6/f0VarEWQujju5BzxZXub6WOrEKCw=\"}\n" +
+      "  },\n" +
+      "  \"authorization\":{\n" +
+      "   \"class\":\"solr.RuleBasedAuthorizationPlugin\",\n" +
+      "   \"permissions\":[\n" +
+      " {\"name\":\"security-edit\", \"role\":\"admin\"},\n" +
+      " {\"name\":\"collection-admin-edit\", \"role\":\"admin\"},\n" +
+      " {\"name\":\"core-admin-edit\", \"role\":\"admin\"}\n" +
+      "   ],\n" +
+      "   \"user-role\":{\"solr\":\"admin\"}\n" +
+      "  }\n" +
+      "}";
+
+
+}