You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pulsar.apache.org by GitBox <gi...@apache.org> on 2022/09/04 13:14:23 UTC

[GitHub] [pulsar] asafm commented on a diff in pull request #16758: [improve][txn] PIP-160 Metrics stats of Transaction buffered writer

asafm commented on code in PR #16758:
URL: https://github.com/apache/pulsar/pull/16758#discussion_r962312507


##########
pulsar-transaction/coordinator/src/main/java/org/apache/pulsar/transaction/coordinator/impl/TxnLogBufferedWriterMetricsStats.java:
##########
@@ -0,0 +1,208 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pulsar.transaction.coordinator.impl;
+
+import io.prometheus.client.CollectorRegistry;
+import io.prometheus.client.Counter;
+import io.prometheus.client.Histogram;
+import java.io.Closeable;
+import lombok.Getter;
+
+/***
+ * Describes the working status of the {@link TxnLogBufferedWriter}, helps users tune the thresholds of
+ * {@link TxnLogBufferedWriter} for best performance.
+ * Note-1: When batch feature is turned off, no data is logged at this. In this scenario,users can see the
+ *   {@link org.apache.bookkeeper.mledger.ManagedLedgerMXBean}.
+ * Note-2: Even if enable batch feature. A batch has numerous triggers. The metrics in this class count each type of
+ *   trigger to allow you to diagnose what mostly causing a batch flush. The metric also includes a histogram for delay
+ *   of batch since 1st record entered, the size of batch in bytes number of records in batch. This will help you to
+ *   tune the parameters that control some of the batch flush triggers: maxDelay, maxRecords, maxSize.
+ *   Note that the 4th trigger - a single record larger than batch size - triggers a flush of the current batch, but
+ *   the big record itself is not written in batch hence is not included in the batch metrics written above (batch
+ *   size, batch delay, etc). The trigger is of course counted as other trigger types.
+ */
+public class TxnLogBufferedWriterMetricsStats implements Closeable {
+
+    static final double[] RECORD_COUNT_PER_ENTRY_BUCKETS = {10, 50, 100, 200, 500, 1_000};
+
+    static final double[] BYTES_SIZE_PER_ENTRY_BUCKETS = {128, 512, 1_024, 2_048, 4_096, 16_384,
+            102_400, 1_232_896};
+
+    static final double[] MAX_DELAY_TIME_BUCKETS = {1, 5, 10};
+
+    @Getter
+    private final String metricsPrefix;
+
+    private final String[] labelNames;
+
+    private final String[] labelValues;
+
+    /** Count of records in per transaction log batch. **/
+    private final Histogram recordsPerBatchMetric;
+    private final Histogram.Child recordsPerBatchHistogram;
+
+    /** Bytes size per transaction log batch. **/
+    private final Histogram batchSizeBytesMetric;
+    private final Histogram.Child batchSizeBytesHistogram;
+
+    /** The time of the oldest transaction log spent in the buffer before being sent. **/
+    private final Histogram oldestRecordInBatchDelayTimeSecondsMetric;
+    private final Histogram.Child oldestRecordInBatchDelayTimeSecondsHistogram;
+
+    /** The count of the triggering transaction log batch flush actions by "batchedWriteMaxRecords". **/
+    private final Counter batchFlushTriggeredByMaxRecordsMetric;
+    private final Counter.Child batchFlushTriggeredByMaxRecordsCounter;
+
+    /** The count of the triggering transaction log batch flush actions by "batchedWriteMaxSize". **/
+    private final Counter batchFlushTriggeredByMaxSizeMetric;
+    private final Counter.Child batchFlushTriggeredByMaxSizeCounter;
+
+    /** The count of the triggering transaction log batch flush actions by "batchedWriteMaxDelayInMillis". **/
+    private final Counter batchFlushTriggeredByMaxDelayMetric;
+    private final Counter.Child batchFlushTriggeredByMaxDelayCounter;
+
+    /**
+     * If {@link TxnLogBufferedWriter#asyncAddData(Object, TxnLogBufferedWriter.AddDataCallback, Object)} accept a
+     * request that param-data is too large (larger than "batchedWriteMaxSize"), then two flushes are executed:
+     *    1. Write the data cached in the queue to BK.
+     *    2. Direct write the large data to BK.
+     * This ensures the sequential nature of multiple writes to BK.
+     */
+    private final Counter batchFlushTriggeredByLargeSingleDataMetric;
+    private final Counter.Child batchFlushTriggeredByLargeSingleDataCounter;
+
+    /**
+     * Users needs to ensure that the {@link TxnLogBufferedWriterMetricsStats} of the same {@param metricsPrefix} can
+     * only create once, otherwise an IllegalArgumentException will be thrown.
+     */
+    public TxnLogBufferedWriterMetricsStats(String metricsPrefix, String[] labelNames, String[] labelValues,

Review Comment:
   @tjiuming This is what I consulted with you if you remember a couple of weeks ago, and based on this I wrote: https://github.com/apache/pulsar/pull/16758#issuecomment-1207782630



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pulsar.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org