You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2022/04/25 09:01:08 UTC

[incubator-doris] branch master updated: avoiding a corrupt image file when there is image.ckpt with non-zero … (#9180)

This is an automated email from the ASF dual-hosted git repository.

lide pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5b9a1a2a5d avoiding a corrupt image file when there is image.ckpt with non-zero … (#9180)
5b9a1a2a5d is described below

commit 5b9a1a2a5dd7c2216452864e9cf17d5534523837
Author: dataroaring <98...@users.noreply.github.com>
AuthorDate: Mon Apr 25 17:01:01 2022 +0800

    avoiding a corrupt image file when there is image.ckpt with non-zero … (#9180)
    
    * avoiding a corrupt image file when there is image.ckpt with non-zero size
    
    For now, saveImage writes data to image.ckpt via an append FileOutputStream,
    when there is a non-zero size file named image.ckpt, a disaster would happen
    due to a corrupt image file. Even worse, fe only keeps the lastest image file
    and removes others.
    
    BTW, image file should be synced to disk.
    
    It is dangerous to only keep the latest image file, because an image file is
    validated when generating the next image file. Then we keep an non validated
    image file but remove validated ones. So I will issue a pr which keeps at least
    2 image file.
    
    * append other data after MetaHeader
    
    * use channel.force instead of sync
---
 fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java   | 9 +++++++--
 fe/fe-core/src/main/java/org/apache/doris/common/MetaFooter.java | 1 +
 fe/fe-core/src/main/java/org/apache/doris/common/MetaHeader.java | 5 +++++
 fe/fe-core/src/main/java/org/apache/doris/common/MetaWriter.java | 8 +++++---
 4 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java
index a670dd2aeb..86d6763b24 100755
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java
@@ -1966,8 +1966,13 @@ public class Catalog {
     }
 
     public void saveImage(File curFile, long replayedJournalId) throws IOException {
-        if (!curFile.exists()) {
-            curFile.createNewFile();
+        if (curFile.exists()) {
+            if (!curFile.delete()) {
+                throw new IOException(curFile.getName() + " can not be deleted.");
+            }
+        }
+        if (!curFile.createNewFile()) {
+            throw new IOException(curFile.getName() + " can not be created.");
         }
         MetaWriter.write(curFile, this);
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/MetaFooter.java b/fe/fe-core/src/main/java/org/apache/doris/common/MetaFooter.java
index 9df82aa9b7..426cd7b3d8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/MetaFooter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/MetaFooter.java
@@ -98,6 +98,7 @@ public class MetaFooter {
             long endIndex = raf.length();
             raf.writeLong(endIndex - startIndex);
             MetaMagicNumber.write(raf);
+            raf.getChannel.force(true);
         }
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/MetaHeader.java b/fe/fe-core/src/main/java/org/apache/doris/common/MetaHeader.java
index 5617a85448..ba91b04b5d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/MetaHeader.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/MetaHeader.java
@@ -75,10 +75,15 @@ public class MetaHeader {
     }
 
     public static long write(File imageFile) throws IOException {
+        if (imageFile.length() != 0) {
+            throw new IOException("Meta header has to be written to an empty file.");
+        }
+
         try (RandomAccessFile raf = new RandomAccessFile(imageFile, "rw")) {
             raf.seek(0);
             MetaMagicNumber.write(raf);
             MetaJsonHeader.write(raf);
+            raf.getChannel.force(true);
             return raf.getFilePointer();
         }
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/MetaWriter.java b/fe/fe-core/src/main/java/org/apache/doris/common/MetaWriter.java
index 6b9d9aa791..387a50be3d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/MetaWriter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/MetaWriter.java
@@ -94,14 +94,15 @@ public class MetaWriter {
 
     public static void write(File imageFile, Catalog catalog) throws IOException {
         // save image does not need any lock. because only checkpoint thread will call this method.
-        LOG.info("start save image to {}. is ckpt: {}", imageFile.getAbsolutePath(), Catalog.isCheckpointThread());
-
+        LOG.info("start to save image to {}. is ckpt: {}", imageFile.getAbsolutePath(), Catalog.isCheckpointThread());
         final Reference<Long> checksum = new Reference<>(0L);
         long saveImageStartTime = System.currentTimeMillis();
+        // MetaHeader should use output stream in the future.
         long startPosition = MetaHeader.write(imageFile);
         List<MetaIndex> metaIndices = Lists.newArrayList();
+        FileOutputStream imageFileOut = new FileOutputStream(imageFile, true);
         try (CountingDataOutputStream dos = new CountingDataOutputStream(new BufferedOutputStream(
-                new FileOutputStream(imageFile, true)), startPosition)) {
+                imageFileOut), startPosition)) {
             writer.setDelegate(dos, metaIndices);
             long replayedJournalId = catalog.getReplayedJournalId();
             checksum.setRef(writer.doWork("header", () -> catalog.saveHeader(dos, replayedJournalId, checksum.getRef())));
@@ -128,6 +129,7 @@ public class MetaWriter {
             checksum.setRef(writer.doWork("plugins", () -> catalog.savePlugins(dos, checksum.getRef())));
             checksum.setRef(writer.doWork("deleteHandler", () -> catalog.saveDeleteHandler(dos, checksum.getRef())));
             checksum.setRef(writer.doWork("sqlBlockRule", () -> catalog.saveSqlBlockRule(dos, checksum.getRef())));
+            imageFileOut.getChannel().force(true);
         }
         MetaFooter.write(imageFile, metaIndices, checksum.getRef());
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org