You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/09/08 08:46:46 UTC

lucene-solr:jira/solr-12709: SOLR-12709: Collect more detailed statistics about shards and replicas in collections.

Repository: lucene-solr
Updated Branches:
  refs/heads/jira/solr-12709 051891036 -> 4a9176b9e


SOLR-12709: Collect more detailed statistics about shards and replicas in collections.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4a9176b9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4a9176b9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4a9176b9

Branch: refs/heads/jira/solr-12709
Commit: 4a9176b9ecf15584b976190b647899ad615db51e
Parents: 0518910
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Sat Sep 8 10:46:12 2018 +0200
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Sat Sep 8 10:46:12 2018 +0200

----------------------------------------------------------------------
 .../cloud/autoscaling/IndexSizeTrigger.java     |   9 +-
 .../cloud/autoscaling/sim/SimCloudManager.java  |   8 +-
 .../sim/SimClusterStateProvider.java            | 100 +++++++++++++++++--
 .../autoscaling/sim/TestSimAutoScaling.java     |  17 ++--
 .../client/solrj/cloud/autoscaling/Policy.java  |   2 +-
 5 files changed, 116 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a9176b9/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
index 967582c..6129cc7 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
@@ -26,6 +26,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.TreeMap;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
@@ -432,13 +433,13 @@ public class IndexSizeTrigger extends TriggerBase {
       super(TriggerEventType.INDEXSIZE, source, eventTime, null);
       properties.put(TriggerEvent.REQUESTED_OPS, ops);
       // avoid passing very large amounts of data here - just use replica names
-      Set<String> above = new HashSet<>();
+      TreeMap<String, String> above = new TreeMap<>();
       aboveSize.forEach((coll, replicas) ->
-          replicas.forEach(r -> above.add(r.getCore())));
+          replicas.forEach(r -> above.put(r.getCore(), "docs=" + r.getVariable(DOCS_SIZE_PROP) + ", bytes=" + r.getVariable(BYTES_SIZE_PROP))));
       properties.put(ABOVE_SIZE_PROP, above);
-      Set<String> below = new HashSet<>();
+      TreeMap<String, String> below = new TreeMap<>();
       belowSize.forEach((coll, replicas) ->
-          replicas.forEach(r -> below.add(r.getCore())));
+          replicas.forEach(r -> below.put(r.getCore(), "docs=" + r.getVariable(DOCS_SIZE_PROP) + ", bytes=" + r.getVariable(BYTES_SIZE_PROP))));
       properties.put(BELOW_SIZE_PROP, below);
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a9176b9/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
index 365c488..7db79d0 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
@@ -360,7 +360,13 @@ public class SimCloudManager implements SolrCloudManager {
     Set<String> deadNodes = getSimNodeStateProvider().simGetDeadNodes();
     sb.append("## Dead nodes:\t\t" + deadNodes.size() + "\n");
     deadNodes.forEach(n -> sb.append("##\t\t" + n + "\n"));
-    sb.append("## Collections:\t" + getSimClusterStateProvider().simListCollections() + "\n");
+    sb.append("## Collections:\n");
+      clusterStateProvider.simGetCollectionStats().forEach((coll, stats) -> {
+        sb.append("##  * ").append(coll).append('\n');
+        stats.forEach((k, v) -> {
+          sb.append("##    " + k + "\t" + v + "\n");
+        });
+      });
     if (withCollections) {
       ClusterState state = clusterStateProvider.getClusterState();
       state.forEachCollection(coll -> sb.append(coll.toString() + "\n"));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a9176b9/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
index 3f31031..77bcfd7 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
@@ -26,12 +26,14 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Random;
 import java.util.Set;
+import java.util.TreeMap;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
@@ -42,6 +44,7 @@ import java.util.concurrent.atomic.AtomicReference;
 import java.util.concurrent.locks.ReentrantLock;
 
 import com.google.common.util.concurrent.AtomicDouble;
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
 import org.apache.solr.client.solrj.cloud.DistribStateManager;
 import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
 import org.apache.solr.client.solrj.cloud.autoscaling.Policy;
@@ -1216,13 +1219,10 @@ public class SimClusterStateProvider implements ClusterStateProvider {
         // apply buffered updates
         long perShard = bufferedUpdates.get() / subSlices.size();
         long remainder = bufferedUpdates.get() % subSlices.size();
-        String subSlice = null;
-        for (String sub : subSlices) {
+        for (int i = 0; i < subSlices.size(); i++) {
+          String sub = subSlices.get(i);
           long numUpdates = perShard;
-          if (subSlice == null) {
-            subSlice = sub;
-          }
-          if (subSlice.equals(sub)) {
+          if (i == 0) {
             numUpdates += remainder;
           }
           simSetShardValue(collectionName, sub, "SEARCHER.searcher.numDocs", numUpdates, true, false);
@@ -1437,6 +1437,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
       it = req.getDocIterator();
       if (it != null) {
         while (it.hasNext()) {
+          it.next();
           docCount++;
         }
       }
@@ -1836,6 +1837,93 @@ public class SimClusterStateProvider implements ClusterStateProvider {
     return new ArrayList<>(colShardReplicaMap.keySet());
   }
 
+  public Map<String, Map<String, Object>> simGetCollectionStats() throws IOException {
+    Map<String, Map<String, Object>> stats = new TreeMap<>();
+    ClusterState state = getClusterState();
+    state.forEachCollection(coll -> {
+      Map<String, Object> perColl = new LinkedHashMap<>();
+      stats.put(coll.getName(), perColl);
+      perColl.put("shardsTotal", coll.getSlices().size());
+      Map<String, AtomicInteger> shardState = new TreeMap<>();
+      int noLeader = 0;
+
+      SummaryStatistics docs = new SummaryStatistics();
+      SummaryStatistics bytes = new SummaryStatistics();
+      SummaryStatistics inactiveDocs = new SummaryStatistics();
+      SummaryStatistics inactiveBytes = new SummaryStatistics();
+
+      long deletedDocs = 0;
+      int totalReplicas = 0;
+      int activeReplicas = 0;
+
+      for (Slice s : coll.getSlices()) {
+        shardState.computeIfAbsent(s.getState().toString(), st -> new AtomicInteger())
+            .incrementAndGet();
+        totalReplicas += s.getReplicas().size();
+        if (s.getState() != Slice.State.ACTIVE) {
+          if (!s.getReplicas().isEmpty()) {
+            ReplicaInfo ri = getReplicaInfo(s.getReplicas().iterator().next());
+            if (ri != null) {
+              Number numDocs = (Number)ri.getVariable("SEARCHER.searcher.numDocs");
+              Number numBytes = (Number)ri.getVariable("INDEX.sizeInBytes");
+              inactiveDocs.addValue(numDocs.doubleValue());
+              inactiveBytes.addValue(numBytes.doubleValue());
+            }
+          }
+          continue;
+        }
+        activeReplicas += s.getReplicas().size();
+        Replica leader = s.getLeader();
+        if (leader == null) {
+          noLeader++;
+          if (!s.getReplicas().isEmpty()) {
+            leader = s.getReplicas().iterator().next();
+          }
+        }
+        ReplicaInfo ri = null;
+        if (leader != null) {
+          ri = getReplicaInfo(leader);
+          if (ri == null) {
+            log.warn("Unknown ReplicaInfo for {}", leader);
+          }
+        }
+        if (ri != null) {
+          Number numDocs = (Number)ri.getVariable("SEARCHER.searcher.numDocs");
+          Number delDocs = (Number)ri.getVariable("SEARCHER.searcher.deleteDocs");
+          Number numBytes = (Number)ri.getVariable("INDEX.sizeInBytes");
+          docs.addValue(numDocs.doubleValue());
+          if (delDocs != null) {
+            deletedDocs += delDocs.longValue();
+          }
+          bytes.addValue(numBytes.doubleValue());
+        }
+      }
+      perColl.put("shardsState", shardState);
+      perColl.put("  shardsWithoutLeader", noLeader);
+      perColl.put("totalReplicas", totalReplicas);
+      perColl.put("  activeReplicas", activeReplicas);
+      perColl.put("  inactiveReplicas", totalReplicas - activeReplicas);
+      perColl.put("totalActiveDocs", String.format("%,d", (long)docs.getSum()));
+      perColl.put("  maxActiveSliceDocs", String.format("%,d", (long)docs.getMax()));
+      perColl.put("  minActiveSliceDocs", String.format("%,d", (long)docs.getMin()));
+      perColl.put("  avgActiveSliceDocs", String.format("%,.0f", docs.getMean()));
+      perColl.put("totalInactiveDocs", String.format("%,d", (long)inactiveDocs.getSum()));
+      perColl.put("  maxInactiveSliceDocs", String.format("%,d", (long)inactiveDocs.getMax()));
+      perColl.put("  minInactiveSliceDocs", String.format("%,d", (long)inactiveDocs.getMin()));
+      perColl.put("  avgInactiveSliceDocs", String.format("%,.0f", inactiveDocs.getMean()));
+      perColl.put("totalActiveBytes", String.format("%,d", (long)bytes.getSum()));
+      perColl.put("  maxActiveSliceBytes", String.format("%,d", (long)bytes.getMax()));
+      perColl.put("  minActiveSliceBytes", String.format("%,d", (long)bytes.getMin()));
+      perColl.put("  avgActiveSliceBytes", String.format("%,.0f", bytes.getMean()));
+      perColl.put("totalInactiveBytes", String.format("%,d", (long)inactiveBytes.getSum()));
+      perColl.put("  maxInactiveSliceBytes", String.format("%,d", (long)inactiveBytes.getMax()));
+      perColl.put("  minInactiveSliceBytes", String.format("%,d", (long)inactiveBytes.getMin()));
+      perColl.put("  avgInactiveSliceBytes", String.format("%,.0f", inactiveBytes.getMean()));
+      perColl.put("totalActiveDeletedDocs", String.format("%,d", deletedDocs));
+    });
+    return stats;
+  }
+
   // interface methods
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a9176b9/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java
index 369ebc8..918d7ae 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java
@@ -26,17 +26,17 @@ import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAut
  *
  */
 @TimeoutSuite(millis = 48 * 3600 * 1000)
-@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.ComputePlanAction=INFO;org.apache.solr.cloud.autoscaling.ExecutePlanAction=INFO;org.apache.solr.cloud.autoscaling.ScheduledTriggers=INFO")
+@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.ComputePlanAction=INFO;org.apache.solr.cloud.autoscaling.ExecutePlanAction=DEBUG;org.apache.solr.cloud.autoscaling.ScheduledTriggers=INFO")
 //@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.CloudTestUtils=TRACE")
 public class TestSimAutoScaling extends SimSolrCloudTestCase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
-  private static final int SPEED = 50;
-  private static final int NUM_NODES = 50;
+  private static final int SPEED = 500;
+  private static final int NUM_NODES = 200;
 
-  private static final long BATCH_SIZE = 8000000;
-  private static final long NUM_BATCHES = 100000;
-  private static final long ABOVE_SIZE = 2000000;
+  private static final long BATCH_SIZE = 20000;
+  private static final long NUM_BATCHES = 10000000;
+  private static final long ABOVE_SIZE = 10000000;
 
 
   private static TimeSource timeSource;
@@ -59,7 +59,8 @@ public class TestSimAutoScaling extends SimSolrCloudTestCase {
     CloudTestUtils.waitForState(cluster, "failed to create " + collectionName, collectionName,
         CloudTestUtils.clusterShape(2, 2, false, true));
 
-    long waitForSeconds = 3 + random().nextInt(5);
+    //long waitForSeconds = 3 + random().nextInt(5);
+    long waitForSeconds = 1;
     String setTriggerCommand = "{" +
         "'set-trigger' : {" +
         "'name' : 'scaleUpTrigger'," +
@@ -76,8 +77,8 @@ public class TestSimAutoScaling extends SimSolrCloudTestCase {
 
     long batchSize = BATCH_SIZE;
     for (long i = 0; i < NUM_BATCHES; i++) {
-      log.info(String.format("#### Total docs so far: %,d", (i * batchSize)));
       addDocs(collectionName, i * batchSize, batchSize);
+      log.info(String.format("#### Total docs so far: %,d", ((i + 1) * batchSize)));
       timeSource.sleep(waitForSeconds);
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a9176b9/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java
index 711b4c3..b68a5a9 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java
@@ -294,7 +294,7 @@ public class Policy implements MapWriter {
               lastComparison[0].node,
               lastComparison[1].node,
               matrix.size());
-          throw e;
+          throw new RuntimeException(e.getMessage());
         }
         p.setApproxVal(tmpMatrix);
       }