You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2017/01/09 12:31:48 UTC
lucene-solr:jira/solr-9856: SOLR-9856 Fix timer in PeerSync,
add skipped sync counter. Add unit tests.
Repository: lucene-solr
Updated Branches:
refs/heads/jira/solr-9856 482ce1e56 -> ca2e2befa
SOLR-9856 Fix timer in PeerSync, add skipped sync counter. Add unit tests.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ca2e2bef
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ca2e2bef
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ca2e2bef
Branch: refs/heads/jira/solr-9856
Commit: ca2e2befa4e1e40960e1b4ed961423b54c5cd6a4
Parents: 482ce1e
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Mon Jan 9 13:31:14 2017 +0100
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Mon Jan 9 13:31:14 2017 +0100
----------------------------------------------------------------------
.../java/org/apache/solr/update/PeerSync.java | 22 +++++++++++++++--
.../solr/cloud/PeerSyncReplicationTest.java | 15 +++++++++++
.../apache/solr/cloud/TestCloudRecovery.java | 26 ++++++++++++++++++++
3 files changed, 61 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca2e2bef/solr/core/src/java/org/apache/solr/update/PeerSync.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/PeerSync.java b/solr/core/src/java/org/apache/solr/update/PeerSync.java
index 428af0f..861cbf7 100644
--- a/solr/core/src/java/org/apache/solr/update/PeerSync.java
+++ b/solr/core/src/java/org/apache/solr/update/PeerSync.java
@@ -95,6 +95,7 @@ public class PeerSync implements SolrMetricProducer {
// metrics
private Timer syncTime;
private Counter syncErrors;
+ private Counter syncSkipped;
// comparator that sorts by absolute value, putting highest first
public static Comparator<Long> absComparator = (o1, o2) -> {
@@ -163,6 +164,7 @@ public class PeerSync implements SolrMetricProducer {
public void initializeMetrics(SolrMetricManager manager, String registry, String scope) {
syncTime = manager.timer(registry, "time", scope);
syncErrors = manager.counter(registry, "errors", scope);
+ syncSkipped = manager.counter(registry, "skipped", scope);
}
/** optional list of updates we had before possibly receiving new updates */
@@ -224,9 +226,11 @@ public class PeerSync implements SolrMetricProducer {
*/
public PeerSyncResult sync() {
if (ulog == null) {
+ syncErrors.inc();
return PeerSyncResult.failure();
}
MDCLoggingContext.setCore(core);
+ Timer.Context timerContext = null;
try {
log.info(msg() + "START replicas=" + replicas + " nUpdates=" + nUpdates);
@@ -237,10 +241,13 @@ public class PeerSync implements SolrMetricProducer {
}
// check if we already in sync to begin with
if(doFingerprint && alreadyInSync()) {
+ syncSkipped.inc();
return PeerSyncResult.success();
}
-
-
+
+ // measure only when actual sync is performed
+ timerContext = syncTime.time();
+
// Fire off the requests before getting our own recent updates (for better concurrency)
// This also allows us to avoid getting updates we don't need... if we got our updates and then got their updates,
// they would
@@ -258,6 +265,7 @@ public class PeerSync implements SolrMetricProducer {
if (startingVersions != null) {
if (startingVersions.size() == 0) {
log.warn("no frame of reference to tell if we've missed updates");
+ syncErrors.inc();
return PeerSyncResult.failure();
}
Collections.sort(startingVersions, absComparator);
@@ -273,6 +281,7 @@ public class PeerSync implements SolrMetricProducer {
if (Math.abs(startingVersions.get(0)) < smallestNewUpdate) {
log.warn(msg()
+ "too many updates received since start - startingUpdates no longer overlaps with our currentUpdates");
+ syncErrors.inc();
return PeerSyncResult.failure();
}
@@ -301,10 +310,12 @@ public class PeerSync implements SolrMetricProducer {
if (srsp.getException() == null) {
List<Long> otherVersions = (List<Long>)srsp.getSolrResponse().getResponse().get("versions");
if (otherVersions != null && !otherVersions.isEmpty()) {
+ syncErrors.inc();
return PeerSyncResult.failure(true);
}
}
}
+ syncErrors.inc();
return PeerSyncResult.failure(false);
}
}
@@ -320,6 +331,7 @@ public class PeerSync implements SolrMetricProducer {
if (!success) {
log.info(msg() + "DONE. sync failed");
shardHandler.cancelAll();
+ syncErrors.inc();
return PeerSyncResult.failure();
}
}
@@ -334,8 +346,14 @@ public class PeerSync implements SolrMetricProducer {
}
log.info(msg() + "DONE. sync " + (success ? "succeeded" : "failed"));
+ if (!success) {
+ syncErrors.inc();
+ }
return success ? PeerSyncResult.success() : PeerSyncResult.failure();
} finally {
+ if (timerContext != null) {
+ timerContext.close();
+ }
MDCLoggingContext.clear();
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca2e2bef/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java b/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java
index 4084ad7..57784b6 100644
--- a/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java
@@ -27,9 +27,14 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
+import com.codahale.metrics.Counter;
+import com.codahale.metrics.Metric;
+import com.codahale.metrics.MetricRegistry;
+import com.codahale.metrics.Timer;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrQuery;
@@ -172,6 +177,16 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase {
// make sure leader has not changed after bringing initial leader back
assertEquals(nodePeerSynced, shardToLeaderJetty.get("shard1"));
+
+ // assert metrics
+ MetricRegistry registry = nodePeerSynced.jetty.getCoreContainer().getMetricManager().registry("solr.core.collection1");
+ Map<String, Metric> metrics = registry.getMetrics();
+ assertTrue("REPLICATION.time present", metrics.containsKey("REPLICATION.time"));
+ assertTrue("REPLICATION.errors present", metrics.containsKey("REPLICATION.errors"));
+ Timer timer = (Timer)metrics.get("REPLICATION.time");
+ assertEquals(1L, timer.getCount());
+ Counter counter = (Counter)metrics.get("REPLICATION.errors");
+ assertEquals(0L, counter.getCount());
success = true;
} finally {
System.clearProperty("solr.disableFingerprint");
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca2e2bef/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
index e2f3bfd..164eeab 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
@@ -23,9 +23,14 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
+import com.codahale.metrics.Counter;
+import com.codahale.metrics.Metric;
+import com.codahale.metrics.Timer;
import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
@@ -35,6 +40,7 @@ import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.cloud.ClusterStateUtil;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.core.SolrCore;
+import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.update.DirectUpdateHandler2;
import org.apache.solr.update.UpdateLog;
import org.apache.solr.util.TestInjection;
@@ -102,6 +108,26 @@ public class TestCloudRecovery extends SolrCloudTestCase {
assertEquals(4, resp.getResults().getNumFound());
// Make sure all nodes is recover from tlog
assertEquals(4, countReplayLog.get());
+
+ // check metrics
+ int replicationCount = 0;
+ int errorsCount = 0;
+ int skippedCount = 0;
+ for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
+ SolrMetricManager manager = jetty.getCoreContainer().getMetricManager();
+ List<String> registryNames = manager.registryNames().stream()
+ .filter(s -> s.startsWith("solr.core.")).collect(Collectors.toList());
+ for (String registry : registryNames) {
+ Map<String, Metric> metrics = manager.registry(registry).getMetrics();
+ Timer timer = (Timer)metrics.get("REPLICATION.time");
+ Counter counter = (Counter)metrics.get("REPLICATION.errors");
+ Counter skipped = (Counter)metrics.get("REPLICATION.skipped");
+ replicationCount += timer.getCount();
+ errorsCount += counter.getCount();
+ skippedCount += skipped.getCount();
+ }
+ }
+ assertEquals(2, replicationCount);
}
@Test