You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pulsar.apache.org by pe...@apache.org on 2021/12/20 11:57:07 UTC

[pulsar] 17/22: Add retry to tolerate the offload index file read failure (#12452)

This is an automated email from the ASF dual-hosted git repository.

penghui pushed a commit to branch branch-2.9
in repository https://gitbox.apache.org/repos/asf/pulsar.git

commit dc99ff57c58d73a9b7ebf36eeb64c9e0f724e6a0
Author: Yong Zhang <zh...@gmail.com>
AuthorDate: Fri Oct 22 11:22:57 2021 +0800

    Add retry to tolerate the offload index file read failure (#12452)
    
    * Add retry to tolerate the offload index file read failure
    ---
    
    *Motivation*
    
    We met the ReadLedgerMetadata exception when reading the index
    file. The index file only read once, so it may not read all the
    data from the stream and cause the metadata read failed. We need
    to ensure the all data is read from the stream or the stream is
    end. When the stream is end, we will receive the EOF exception,
    so we need to use `readFully` not `read`.
    
    Add the retry logic to tolerate the failure cause by the network.
    Because the stream is from the HTTP, so it's may break on some
    case. Add a small retry to avoid it to backoff by the dispatcher.
    
    *Modifications*
    
    - Use `readFully` to replace the `read` method
    - Add a small retry for handling the index block build
    
    * Add comments and enrich log
    
    (cherry picked from commit 33bcc17bbe07ffd9683556edecd9ce546b8fd93f)
---
 .../jcloud/impl/BlobStoreBackedReadHandleImpl.java | 32 ++++++++++++++++++----
 .../offload/jcloud/impl/OffloadIndexBlockImpl.java |  6 +---
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/tiered-storage/jcloud/src/main/java/org/apache/bookkeeper/mledger/offload/jcloud/impl/BlobStoreBackedReadHandleImpl.java b/tiered-storage/jcloud/src/main/java/org/apache/bookkeeper/mledger/offload/jcloud/impl/BlobStoreBackedReadHandleImpl.java
index 98fdff4..f4dc1b8 100644
--- a/tiered-storage/jcloud/src/main/java/org/apache/bookkeeper/mledger/offload/jcloud/impl/BlobStoreBackedReadHandleImpl.java
+++ b/tiered-storage/jcloud/src/main/java/org/apache/bookkeeper/mledger/offload/jcloud/impl/BlobStoreBackedReadHandleImpl.java
@@ -224,12 +224,32 @@ public class BlobStoreBackedReadHandleImpl implements ReadHandle {
                                   VersionCheck versionCheck,
                                   long ledgerId, int readBufferSize)
             throws IOException {
-        Blob blob = blobStore.getBlob(bucket, indexKey);
-        versionCheck.check(indexKey, blob);
-        OffloadIndexBlockBuilder indexBuilder = OffloadIndexBlockBuilder.create();
-        OffloadIndexBlock index;
-        try (InputStream payLoadStream = blob.getPayload().openStream()) {
-            index = (OffloadIndexBlock) indexBuilder.fromStream(payLoadStream);
+        int retryCount = 3;
+        OffloadIndexBlock index = null;
+        IOException lastException = null;
+        // The following retry is used to avoid to some network issue cause read index file failure.
+        // If it can not recovery in the retry, we will throw the exception and the dispatcher will schedule to
+        // next read.
+        // If we use a backoff to control the retry, it will introduce a concurrent operation.
+        // We don't want to make it complicated, because in the most of case it shouldn't in the retry loop.
+        while (retryCount-- > 0) {
+            Blob blob = blobStore.getBlob(bucket, indexKey);
+            versionCheck.check(indexKey, blob);
+            OffloadIndexBlockBuilder indexBuilder = OffloadIndexBlockBuilder.create();
+            try (InputStream payLoadStream = blob.getPayload().openStream()) {
+                index = (OffloadIndexBlock) indexBuilder.fromStream(payLoadStream);
+            } catch (IOException e) {
+                // retry to avoid the network issue caused read failure
+                log.warn("Failed to get index block from the offoaded index file {}, still have {} times to retry",
+                    indexKey, retryCount, e);
+                lastException = e;
+                continue;
+            }
+            lastException = null;
+            break;
+        }
+        if (lastException != null) {
+            throw lastException;
         }
 
         BackedInputStream inputStream = new BlobStoreBackedInputStreamImpl(blobStore, bucket, key,
diff --git a/tiered-storage/jcloud/src/main/java/org/apache/bookkeeper/mledger/offload/jcloud/impl/OffloadIndexBlockImpl.java b/tiered-storage/jcloud/src/main/java/org/apache/bookkeeper/mledger/offload/jcloud/impl/OffloadIndexBlockImpl.java
index 2f64089..a3fa14e 100644
--- a/tiered-storage/jcloud/src/main/java/org/apache/bookkeeper/mledger/offload/jcloud/impl/OffloadIndexBlockImpl.java
+++ b/tiered-storage/jcloud/src/main/java/org/apache/bookkeeper/mledger/offload/jcloud/impl/OffloadIndexBlockImpl.java
@@ -338,11 +338,7 @@ public class OffloadIndexBlockImpl implements OffloadIndexBlock {
         int segmentMetadataLength = dis.readInt();
 
         byte[] metadataBytes = new byte[segmentMetadataLength];
-
-        if (segmentMetadataLength != dis.read(metadataBytes)) {
-            log.error("Read ledgerMetadata from bytes failed");
-            throw new IOException("Read ledgerMetadata from bytes failed");
-        }
+        dis.readFully(metadataBytes);
         this.segmentMetadata = parseLedgerMetadata(metadataBytes);
 
         for (int i = 0; i < indexEntryCount; i++) {