You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/09/08 08:46:46 UTC
lucene-solr:jira/solr-12709: SOLR-12709: Collect more detailed
statistics about shards and replicas in collections.
Repository: lucene-solr
Updated Branches:
refs/heads/jira/solr-12709 051891036 -> 4a9176b9e
SOLR-12709: Collect more detailed statistics about shards and replicas in collections.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4a9176b9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4a9176b9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4a9176b9
Branch: refs/heads/jira/solr-12709
Commit: 4a9176b9ecf15584b976190b647899ad615db51e
Parents: 0518910
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Sat Sep 8 10:46:12 2018 +0200
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Sat Sep 8 10:46:12 2018 +0200
----------------------------------------------------------------------
.../cloud/autoscaling/IndexSizeTrigger.java | 9 +-
.../cloud/autoscaling/sim/SimCloudManager.java | 8 +-
.../sim/SimClusterStateProvider.java | 100 +++++++++++++++++--
.../autoscaling/sim/TestSimAutoScaling.java | 17 ++--
.../client/solrj/cloud/autoscaling/Policy.java | 2 +-
5 files changed, 116 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a9176b9/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
index 967582c..6129cc7 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
@@ -26,6 +26,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
@@ -432,13 +433,13 @@ public class IndexSizeTrigger extends TriggerBase {
super(TriggerEventType.INDEXSIZE, source, eventTime, null);
properties.put(TriggerEvent.REQUESTED_OPS, ops);
// avoid passing very large amounts of data here - just use replica names
- Set<String> above = new HashSet<>();
+ TreeMap<String, String> above = new TreeMap<>();
aboveSize.forEach((coll, replicas) ->
- replicas.forEach(r -> above.add(r.getCore())));
+ replicas.forEach(r -> above.put(r.getCore(), "docs=" + r.getVariable(DOCS_SIZE_PROP) + ", bytes=" + r.getVariable(BYTES_SIZE_PROP))));
properties.put(ABOVE_SIZE_PROP, above);
- Set<String> below = new HashSet<>();
+ TreeMap<String, String> below = new TreeMap<>();
belowSize.forEach((coll, replicas) ->
- replicas.forEach(r -> below.add(r.getCore())));
+ replicas.forEach(r -> below.put(r.getCore(), "docs=" + r.getVariable(DOCS_SIZE_PROP) + ", bytes=" + r.getVariable(BYTES_SIZE_PROP))));
properties.put(BELOW_SIZE_PROP, below);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a9176b9/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
index 365c488..7db79d0 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
@@ -360,7 +360,13 @@ public class SimCloudManager implements SolrCloudManager {
Set<String> deadNodes = getSimNodeStateProvider().simGetDeadNodes();
sb.append("## Dead nodes:\t\t" + deadNodes.size() + "\n");
deadNodes.forEach(n -> sb.append("##\t\t" + n + "\n"));
- sb.append("## Collections:\t" + getSimClusterStateProvider().simListCollections() + "\n");
+ sb.append("## Collections:\n");
+ clusterStateProvider.simGetCollectionStats().forEach((coll, stats) -> {
+ sb.append("## * ").append(coll).append('\n');
+ stats.forEach((k, v) -> {
+ sb.append("## " + k + "\t" + v + "\n");
+ });
+ });
if (withCollections) {
ClusterState state = clusterStateProvider.getClusterState();
state.forEachCollection(coll -> sb.append(coll.toString() + "\n"));
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a9176b9/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
index 3f31031..77bcfd7 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
@@ -26,12 +26,14 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Random;
import java.util.Set;
+import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
@@ -42,6 +44,7 @@ import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantLock;
import com.google.common.util.concurrent.AtomicDouble;
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.solr.client.solrj.cloud.DistribStateManager;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.autoscaling.Policy;
@@ -1216,13 +1219,10 @@ public class SimClusterStateProvider implements ClusterStateProvider {
// apply buffered updates
long perShard = bufferedUpdates.get() / subSlices.size();
long remainder = bufferedUpdates.get() % subSlices.size();
- String subSlice = null;
- for (String sub : subSlices) {
+ for (int i = 0; i < subSlices.size(); i++) {
+ String sub = subSlices.get(i);
long numUpdates = perShard;
- if (subSlice == null) {
- subSlice = sub;
- }
- if (subSlice.equals(sub)) {
+ if (i == 0) {
numUpdates += remainder;
}
simSetShardValue(collectionName, sub, "SEARCHER.searcher.numDocs", numUpdates, true, false);
@@ -1437,6 +1437,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
it = req.getDocIterator();
if (it != null) {
while (it.hasNext()) {
+ it.next();
docCount++;
}
}
@@ -1836,6 +1837,93 @@ public class SimClusterStateProvider implements ClusterStateProvider {
return new ArrayList<>(colShardReplicaMap.keySet());
}
+ public Map<String, Map<String, Object>> simGetCollectionStats() throws IOException {
+ Map<String, Map<String, Object>> stats = new TreeMap<>();
+ ClusterState state = getClusterState();
+ state.forEachCollection(coll -> {
+ Map<String, Object> perColl = new LinkedHashMap<>();
+ stats.put(coll.getName(), perColl);
+ perColl.put("shardsTotal", coll.getSlices().size());
+ Map<String, AtomicInteger> shardState = new TreeMap<>();
+ int noLeader = 0;
+
+ SummaryStatistics docs = new SummaryStatistics();
+ SummaryStatistics bytes = new SummaryStatistics();
+ SummaryStatistics inactiveDocs = new SummaryStatistics();
+ SummaryStatistics inactiveBytes = new SummaryStatistics();
+
+ long deletedDocs = 0;
+ int totalReplicas = 0;
+ int activeReplicas = 0;
+
+ for (Slice s : coll.getSlices()) {
+ shardState.computeIfAbsent(s.getState().toString(), st -> new AtomicInteger())
+ .incrementAndGet();
+ totalReplicas += s.getReplicas().size();
+ if (s.getState() != Slice.State.ACTIVE) {
+ if (!s.getReplicas().isEmpty()) {
+ ReplicaInfo ri = getReplicaInfo(s.getReplicas().iterator().next());
+ if (ri != null) {
+ Number numDocs = (Number)ri.getVariable("SEARCHER.searcher.numDocs");
+ Number numBytes = (Number)ri.getVariable("INDEX.sizeInBytes");
+ inactiveDocs.addValue(numDocs.doubleValue());
+ inactiveBytes.addValue(numBytes.doubleValue());
+ }
+ }
+ continue;
+ }
+ activeReplicas += s.getReplicas().size();
+ Replica leader = s.getLeader();
+ if (leader == null) {
+ noLeader++;
+ if (!s.getReplicas().isEmpty()) {
+ leader = s.getReplicas().iterator().next();
+ }
+ }
+ ReplicaInfo ri = null;
+ if (leader != null) {
+ ri = getReplicaInfo(leader);
+ if (ri == null) {
+ log.warn("Unknown ReplicaInfo for {}", leader);
+ }
+ }
+ if (ri != null) {
+ Number numDocs = (Number)ri.getVariable("SEARCHER.searcher.numDocs");
+ Number delDocs = (Number)ri.getVariable("SEARCHER.searcher.deleteDocs");
+ Number numBytes = (Number)ri.getVariable("INDEX.sizeInBytes");
+ docs.addValue(numDocs.doubleValue());
+ if (delDocs != null) {
+ deletedDocs += delDocs.longValue();
+ }
+ bytes.addValue(numBytes.doubleValue());
+ }
+ }
+ perColl.put("shardsState", shardState);
+ perColl.put(" shardsWithoutLeader", noLeader);
+ perColl.put("totalReplicas", totalReplicas);
+ perColl.put(" activeReplicas", activeReplicas);
+ perColl.put(" inactiveReplicas", totalReplicas - activeReplicas);
+ perColl.put("totalActiveDocs", String.format("%,d", (long)docs.getSum()));
+ perColl.put(" maxActiveSliceDocs", String.format("%,d", (long)docs.getMax()));
+ perColl.put(" minActiveSliceDocs", String.format("%,d", (long)docs.getMin()));
+ perColl.put(" avgActiveSliceDocs", String.format("%,.0f", docs.getMean()));
+ perColl.put("totalInactiveDocs", String.format("%,d", (long)inactiveDocs.getSum()));
+ perColl.put(" maxInactiveSliceDocs", String.format("%,d", (long)inactiveDocs.getMax()));
+ perColl.put(" minInactiveSliceDocs", String.format("%,d", (long)inactiveDocs.getMin()));
+ perColl.put(" avgInactiveSliceDocs", String.format("%,.0f", inactiveDocs.getMean()));
+ perColl.put("totalActiveBytes", String.format("%,d", (long)bytes.getSum()));
+ perColl.put(" maxActiveSliceBytes", String.format("%,d", (long)bytes.getMax()));
+ perColl.put(" minActiveSliceBytes", String.format("%,d", (long)bytes.getMin()));
+ perColl.put(" avgActiveSliceBytes", String.format("%,.0f", bytes.getMean()));
+ perColl.put("totalInactiveBytes", String.format("%,d", (long)inactiveBytes.getSum()));
+ perColl.put(" maxInactiveSliceBytes", String.format("%,d", (long)inactiveBytes.getMax()));
+ perColl.put(" minInactiveSliceBytes", String.format("%,d", (long)inactiveBytes.getMin()));
+ perColl.put(" avgInactiveSliceBytes", String.format("%,.0f", inactiveBytes.getMean()));
+ perColl.put("totalActiveDeletedDocs", String.format("%,d", deletedDocs));
+ });
+ return stats;
+ }
+
// interface methods
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a9176b9/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java
index 369ebc8..918d7ae 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java
@@ -26,17 +26,17 @@ import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAut
*
*/
@TimeoutSuite(millis = 48 * 3600 * 1000)
-@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.ComputePlanAction=INFO;org.apache.solr.cloud.autoscaling.ExecutePlanAction=INFO;org.apache.solr.cloud.autoscaling.ScheduledTriggers=INFO")
+@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.ComputePlanAction=INFO;org.apache.solr.cloud.autoscaling.ExecutePlanAction=DEBUG;org.apache.solr.cloud.autoscaling.ScheduledTriggers=INFO")
//@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.CloudTestUtils=TRACE")
public class TestSimAutoScaling extends SimSolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- private static final int SPEED = 50;
- private static final int NUM_NODES = 50;
+ private static final int SPEED = 500;
+ private static final int NUM_NODES = 200;
- private static final long BATCH_SIZE = 8000000;
- private static final long NUM_BATCHES = 100000;
- private static final long ABOVE_SIZE = 2000000;
+ private static final long BATCH_SIZE = 20000;
+ private static final long NUM_BATCHES = 10000000;
+ private static final long ABOVE_SIZE = 10000000;
private static TimeSource timeSource;
@@ -59,7 +59,8 @@ public class TestSimAutoScaling extends SimSolrCloudTestCase {
CloudTestUtils.waitForState(cluster, "failed to create " + collectionName, collectionName,
CloudTestUtils.clusterShape(2, 2, false, true));
- long waitForSeconds = 3 + random().nextInt(5);
+ //long waitForSeconds = 3 + random().nextInt(5);
+ long waitForSeconds = 1;
String setTriggerCommand = "{" +
"'set-trigger' : {" +
"'name' : 'scaleUpTrigger'," +
@@ -76,8 +77,8 @@ public class TestSimAutoScaling extends SimSolrCloudTestCase {
long batchSize = BATCH_SIZE;
for (long i = 0; i < NUM_BATCHES; i++) {
- log.info(String.format("#### Total docs so far: %,d", (i * batchSize)));
addDocs(collectionName, i * batchSize, batchSize);
+ log.info(String.format("#### Total docs so far: %,d", ((i + 1) * batchSize)));
timeSource.sleep(waitForSeconds);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a9176b9/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java
index 711b4c3..b68a5a9 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java
@@ -294,7 +294,7 @@ public class Policy implements MapWriter {
lastComparison[0].node,
lastComparison[1].node,
matrix.size());
- throw e;
+ throw new RuntimeException(e.getMessage());
}
p.setApproxVal(tmpMatrix);
}