You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2022/04/25 11:35:47 UTC

[incubator-doris] branch master updated: [Enhancement] (image) check image validity as soon as generated (#9011)

This is an automated email from the ASF dual-hosted git repository.

lide pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new bdf915abd4 [Enhancement] (image) check image validity as soon as generated (#9011)
bdf915abd4 is described below

commit bdf915abd4e0d65e913d4caf9a8ea3ced88e85ab
Author: Henry2SS <45...@users.noreply.github.com>
AuthorDate: Mon Apr 25 19:35:41 2022 +0800

    [Enhancement] (image) check image validity as soon as generated (#9011)
    
    * load newly generated image file as soon as generated to check if it is valid.
    
    * delete the latest invalid image file
    
    * fix
    
    * fix
    
    * get filePath from saveImage() to ensure deleting the correct file while exception happens
    
    * fix
    
    Co-authored-by: wuhangze <wu...@jd.com>
---
 .../java/org/apache/doris/catalog/Catalog.java     |  4 ++-
 .../java/org/apache/doris/master/Checkpoint.java   | 34 +++++++++++++++++++++-
 .../java/org/apache/doris/persist/MetaCleaner.java | 11 +++++++
 3 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java
index 09a06a1596..e5e2914857 100755
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java
@@ -1950,7 +1950,8 @@ public class Catalog {
     }
 
     // Only called by checkpoint thread
-    public void saveImage() throws IOException {
+    // return the latest image file's absolute path
+    public String saveImage() throws IOException {
         // Write image.ckpt
         Storage storage = new Storage(this.imageDir);
         File curFile = storage.getImageFile(replayedJournalId.get());
@@ -1963,6 +1964,7 @@ public class Catalog {
             curFile.delete();
             throw new IOException();
         }
+        return curFile.getAbsolutePath();
     }
 
     public void saveImage(File curFile, long replayedJournalId) throws IOException {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java b/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java
index 7865817a82..2d3c3ac271 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java
@@ -35,6 +35,8 @@ import org.apache.doris.system.Frontend;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
+import com.google.common.base.Strings;
+
 import java.io.IOException;
 import java.io.OutputStream;
 import java.net.HttpURLConnection;
@@ -111,6 +113,8 @@ public class Checkpoint extends MasterDaemon {
         catalog = Catalog.getCurrentCatalog();
         catalog.setEditLog(editLog);
         createStaticFieldForCkpt();
+        boolean exceptionCaught = false;
+        String latestImageFilePath = null;
         try {
             catalog.loadImage(imageDir);
             catalog.replayJournal(checkPointVersion);
@@ -119,13 +123,25 @@ public class Checkpoint extends MasterDaemon {
                         checkPointVersion, catalog.getReplayedJournalId()));
             }
             catalog.fixBugAfterMetadataReplayed(false);
-            catalog.saveImage();
+            latestImageFilePath = catalog.saveImage();
             replayedJournalId = catalog.getReplayedJournalId();
+
+            // destroy checkpoint catalog, reclaim memory
+            catalog = null;
+            Catalog.destroyCheckpoint();
+            destroyStaticFieldForCkpt();
+
+            // Load image to verify if the newly generated image file is valid
+            // If success, do all the following jobs
+            // If failed, just return
+            catalog = Catalog.getCurrentCatalog();
+            catalog.loadImage(imageDir);
             if (MetricRepo.isInit) {
                 MetricRepo.COUNTER_IMAGE_WRITE_SUCCESS.increase(1L);
             }
             LOG.info("checkpoint finished save image.{}", replayedJournalId);
         } catch (Throwable e) {
+            exceptionCaught = true;
             e.printStackTrace();
             LOG.error("Exception when generate new image file", e);
             if (MetricRepo.isInit) {
@@ -137,6 +153,22 @@ public class Checkpoint extends MasterDaemon {
             catalog = null;
             Catalog.destroyCheckpoint();
             destroyStaticFieldForCkpt();
+            // if new image generated && exception caught, delete the latest image here
+            // delete the newest image file, cuz it is invalid
+            if ((!Strings.isNullOrEmpty(latestImageFilePath)) && exceptionCaught) {
+                MetaCleaner cleaner = new MetaCleaner(Config.meta_dir + "/image");
+                try {
+                    cleaner.cleanTheLatestInvalidImageFile(latestImageFilePath);
+                    if (MetricRepo.isInit) {
+                        MetricRepo.COUNTER_IMAGE_CLEAN_SUCCESS.increase(1L);
+                    }
+                } catch (Throwable ex) {
+                    LOG.error("Master delete latest invalid image file failed.", ex);
+                    if (MetricRepo.isInit) {
+                        MetricRepo.COUNTER_IMAGE_CLEAN_FAILED.increase(1L);
+                    }
+                }
+            }
         }
 
         // push image file to all the other non master nodes
diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java b/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java
index 2a7acd1f09..dad0f0d0aa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java
@@ -67,6 +67,17 @@ public class MetaCleaner {
             }
         }
     }
+
+    public void cleanTheLatestInvalidImageFile(String path) throws IOException {
+        File latestInvalidImage = new File(path);
+        if (latestInvalidImage.exists()) {
+            if (latestInvalidImage.delete()) {
+                LOG.info(latestInvalidImage.getAbsoluteFile() + " deleted.");
+            } else {
+                LOG.warn(latestInvalidImage.getAbsoluteFile() + " delete failed.");
+            }
+        }
+    }
     
     private String fileType(File file) throws IOException {
         String type = null;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org