You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bookkeeper.apache.org by si...@apache.org on 2018/12/13 14:50:48 UTC
[bookkeeper] branch master updated: [STATS] [DOC] Add @StatsDoc
annotation for bookkeeper autorecovery stats
This is an automated email from the ASF dual-hosted git repository.
sijie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
The following commit(s) were added to refs/heads/master by this push:
new c0138f3 [STATS] [DOC] Add @StatsDoc annotation for bookkeeper autorecovery stats
c0138f3 is described below
commit c0138f3758333877739f256303722dd892663979
Author: Sijie Guo <gu...@gmail.com>
AuthorDate: Thu Dec 13 22:50:44 2018 +0800
[STATS] [DOC] Add @StatsDoc annotation for bookkeeper autorecovery stats
Descriptions of the changes in this PR:
*Motivation*
As part of [BP-36](https://github.com/apache/bookkeeper/issues/1785), this PR is to document bookkeeper autorecovery stats.
*Changes*
- convert bookkeeper autorecovery stats to use StatsDoc for documenting metrics
Master Issue: #1785
Reviewers: Jia Zhai <None>
This closes #1879 from sijie/replication_stats
---
.../org/apache/bookkeeper/replication/Auditor.java | 64 +++++++++++++++++++++-
.../bookkeeper/replication/AuditorElector.java | 10 ++++
.../bookkeeper/replication/ReplicationWorker.java | 26 ++++++++-
3 files changed, 96 insertions(+), 4 deletions(-)
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
index acf0c09..89883b0 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
@@ -20,6 +20,18 @@
*/
package org.apache.bookkeeper.replication;
+import static org.apache.bookkeeper.replication.ReplicationStats.AUDITOR_SCOPE;
+import static org.apache.bookkeeper.replication.ReplicationStats.AUDIT_BOOKIES_TIME;
+import static org.apache.bookkeeper.replication.ReplicationStats.BOOKIE_TO_LEDGERS_MAP_CREATION_TIME;
+import static org.apache.bookkeeper.replication.ReplicationStats.CHECK_ALL_LEDGERS_TIME;
+import static org.apache.bookkeeper.replication.ReplicationStats.NUM_BOOKIES_PER_LEDGER;
+import static org.apache.bookkeeper.replication.ReplicationStats.NUM_BOOKIE_AUDITS_DELAYED;
+import static org.apache.bookkeeper.replication.ReplicationStats.NUM_DELAYED_BOOKIE_AUDITS_DELAYES_CANCELLED;
+import static org.apache.bookkeeper.replication.ReplicationStats.NUM_FRAGMENTS_PER_LEDGER;
+import static org.apache.bookkeeper.replication.ReplicationStats.NUM_LEDGERS_CHECKED;
+import static org.apache.bookkeeper.replication.ReplicationStats.NUM_UNDER_REPLICATED_LEDGERS;
+import static org.apache.bookkeeper.replication.ReplicationStats.URL_PUBLISH_TIME_FOR_LOST_BOOKIE;
+
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Lists;
@@ -61,6 +73,7 @@ import org.apache.bookkeeper.replication.ReplicationException.UnavailableExcepti
import org.apache.bookkeeper.stats.Counter;
import org.apache.bookkeeper.stats.OpStatsLogger;
import org.apache.bookkeeper.stats.StatsLogger;
+import org.apache.bookkeeper.stats.annotations.StatsDoc;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.zookeeper.AsyncCallback;
import org.apache.zookeeper.KeeperException;
@@ -76,6 +89,10 @@ import org.slf4j.LoggerFactory;
*
* <p>TODO: eliminate the direct usage of zookeeper here {@link https://github.com/apache/bookkeeper/issues/1332}
*/
+@StatsDoc(
+ name = AUDITOR_SCOPE,
+ help = "Auditor related stats"
+)
public class Auditor implements AutoCloseable {
private static final Logger LOG = LoggerFactory.getLogger(Auditor.class);
private final ServerConfiguration conf;
@@ -88,20 +105,61 @@ public class Auditor implements AutoCloseable {
private final ScheduledExecutorService executor;
private List<String> knownBookies = new ArrayList<String>();
private final String bookieIdentifier;
+ private volatile Future<?> auditTask;
+ private Set<String> bookiesToBeAudited = Sets.newHashSet();
+ private volatile int lostBookieRecoveryDelayBeforeChange;
+
private final StatsLogger statsLogger;
+ @StatsDoc(
+ name = NUM_UNDER_REPLICATED_LEDGERS,
+ help = "the distribution of num under_replicated ledgers on each auditor run"
+ )
private final OpStatsLogger numUnderReplicatedLedger;
+ @StatsDoc(
+ name = URL_PUBLISH_TIME_FOR_LOST_BOOKIE,
+ help = "the latency distribution of publishing under replicated ledgers for lost bookies"
+ )
private final OpStatsLogger uRLPublishTimeForLostBookies;
+ @StatsDoc(
+ name = BOOKIE_TO_LEDGERS_MAP_CREATION_TIME,
+ help = "the latency distribution of creating bookies-to-ledgers map"
+ )
private final OpStatsLogger bookieToLedgersMapCreationTime;
+ @StatsDoc(
+ name = CHECK_ALL_LEDGERS_TIME,
+ help = "the latency distribution of checking all ledgers"
+ )
private final OpStatsLogger checkAllLedgersTime;
+ @StatsDoc(
+ name = AUDIT_BOOKIES_TIME,
+ help = "the latency distribution of auditing all the bookies"
+ )
private final OpStatsLogger auditBookiesTime;
+ @StatsDoc(
+ name = NUM_LEDGERS_CHECKED,
+ help = "the number of ledgers checked by the auditor"
+ )
private final Counter numLedgersChecked;
+ @StatsDoc(
+ name = NUM_FRAGMENTS_PER_LEDGER,
+ help = "the distribution of number of fragments per ledger"
+ )
private final OpStatsLogger numFragmentsPerLedger;
+ @StatsDoc(
+ name = NUM_BOOKIES_PER_LEDGER,
+ help = "the distribution of number of bookies per ledger"
+ )
private final OpStatsLogger numBookiesPerLedger;
+ @StatsDoc(
+ name = NUM_BOOKIE_AUDITS_DELAYED,
+ help = "the number of bookie-audits delayed"
+ )
private final Counter numBookieAuditsDelayed;
+ @StatsDoc(
+ name = NUM_DELAYED_BOOKIE_AUDITS_DELAYES_CANCELLED,
+ help = "the number of delayed-bookie-audits cancelled"
+ )
private final Counter numDelayedBookieAuditsCancelled;
- private volatile Future<?> auditTask;
- private Set<String> bookiesToBeAudited = Sets.newHashSet();
- private volatile int lostBookieRecoveryDelayBeforeChange;
static BookKeeper createBookKeeperClient(ServerConfiguration conf)
throws InterruptedException, IOException {
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java
index 14c5c53..543aaac 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java
@@ -21,6 +21,7 @@
package org.apache.bookkeeper.replication;
import static com.google.common.base.Charsets.UTF_8;
+import static org.apache.bookkeeper.replication.ReplicationStats.AUDITOR_SCOPE;
import static org.apache.bookkeeper.replication.ReplicationStats.ELECTION_ATTEMPTS;
import com.google.common.annotations.VisibleForTesting;
@@ -48,6 +49,7 @@ import org.apache.bookkeeper.replication.ReplicationException.UnavailableExcepti
import org.apache.bookkeeper.stats.Counter;
import org.apache.bookkeeper.stats.NullStatsLogger;
import org.apache.bookkeeper.stats.StatsLogger;
+import org.apache.bookkeeper.stats.annotations.StatsDoc;
import org.apache.bookkeeper.util.BookKeeperConstants;
import org.apache.bookkeeper.util.ZkUtils;
import org.apache.commons.lang.StringUtils;
@@ -71,6 +73,10 @@ import org.slf4j.LoggerFactory;
* will be elected as Auditor. All the other bookies will be watching on their
* predecessor znode according to the ephemeral sequence numbers.
*/
+@StatsDoc(
+ name = AUDITOR_SCOPE,
+ help = "Auditor related stats"
+)
public class AuditorElector {
private static final Logger LOG = LoggerFactory
.getLogger(AuditorElector.class);
@@ -98,6 +104,10 @@ public class AuditorElector {
private AtomicBoolean running = new AtomicBoolean(false);
// Expose Stats
+ @StatsDoc(
+ name = ELECTION_ATTEMPTS,
+ help = "The number of auditor election attempts"
+ )
private final Counter electionAttempts;
private final StatsLogger statsLogger;
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
index eeaa96b..74d2081 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
@@ -22,6 +22,7 @@ package org.apache.bookkeeper.replication;
import static org.apache.bookkeeper.replication.ReplicationStats.NUM_DEFER_LEDGER_LOCK_RELEASE_OF_FAILED_LEDGER;
import static org.apache.bookkeeper.replication.ReplicationStats.NUM_FULL_OR_PARTIAL_LEDGERS_REPLICATED;
import static org.apache.bookkeeper.replication.ReplicationStats.REPLICATE_EXCEPTION;
+import static org.apache.bookkeeper.replication.ReplicationStats.REPLICATION_WORKER_SCOPE;
import static org.apache.bookkeeper.replication.ReplicationStats.REREPLICATE_OP;
import com.google.common.base.Stopwatch;
@@ -64,6 +65,7 @@ import org.apache.bookkeeper.stats.Counter;
import org.apache.bookkeeper.stats.NullStatsLogger;
import org.apache.bookkeeper.stats.OpStatsLogger;
import org.apache.bookkeeper.stats.StatsLogger;
+import org.apache.bookkeeper.stats.annotations.StatsDoc;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -72,6 +74,10 @@ import org.slf4j.LoggerFactory;
* ReplicationWorker will take the fragments one by one from
* ZKLedgerUnderreplicationManager and replicates to it.
*/
+@StatsDoc(
+ name = REPLICATION_WORKER_SCOPE,
+ help = "replication worker related stats"
+)
public class ReplicationWorker implements Runnable {
private static final Logger LOG = LoggerFactory
.getLogger(ReplicationWorker.class);
@@ -93,8 +99,25 @@ public class ReplicationWorker implements Runnable {
// Expose Stats
private final StatsLogger statsLogger;
+ @StatsDoc(
+ name = REPLICATE_EXCEPTION,
+ help = "replication related exceptions"
+ )
+ private final StatsLogger exceptionLogger;
+ @StatsDoc(
+ name = REREPLICATE_OP,
+ help = "operation stats of re-replicating ledgers"
+ )
private final OpStatsLogger rereplicateOpStats;
+ @StatsDoc(
+ name = NUM_FULL_OR_PARTIAL_LEDGERS_REPLICATED,
+ help = "the number of ledgers re-replicated"
+ )
private final Counter numLedgersReplicated;
+ @StatsDoc(
+ name = NUM_DEFER_LEDGER_LOCK_RELEASE_OF_FAILED_LEDGER,
+ help = "the number of defer-ledger-lock-releases of failed ledgers"
+ )
private final Counter numDeferLedgerLockReleaseOfFailedLedger;
private final Map<String, Counter> exceptionCounters;
final LoadingCache<Long, AtomicInteger> replicationFailedLedgers;
@@ -164,6 +187,7 @@ public class ReplicationWorker implements Runnable {
// Expose Stats
this.statsLogger = statsLogger;
+ this.exceptionLogger = statsLogger.scope(REPLICATE_EXCEPTION);
this.rereplicateOpStats = this.statsLogger.getOpStatsLogger(REREPLICATE_OP);
this.numLedgersReplicated = this.statsLogger.getCounter(NUM_FULL_OR_PARTIAL_LEDGERS_REPLICATED);
this.numDeferLedgerLockReleaseOfFailedLedger = this.statsLogger
@@ -561,7 +585,7 @@ public class ReplicationWorker implements Runnable {
private Counter getExceptionCounter(String name) {
Counter counter = this.exceptionCounters.get(name);
if (counter == null) {
- counter = this.statsLogger.scope(REPLICATE_EXCEPTION).getCounter(name);
+ counter = this.exceptionLogger.getCounter(name);
this.exceptionCounters.put(name, counter);
}
return counter;