You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2022/04/25 11:35:47 UTC
[incubator-doris] branch master updated: [Enhancement] (image) check image validity as soon as generated (#9011)
This is an automated email from the ASF dual-hosted git repository.
lide pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new bdf915abd4 [Enhancement] (image) check image validity as soon as generated (#9011)
bdf915abd4 is described below
commit bdf915abd4e0d65e913d4caf9a8ea3ced88e85ab
Author: Henry2SS <45...@users.noreply.github.com>
AuthorDate: Mon Apr 25 19:35:41 2022 +0800
[Enhancement] (image) check image validity as soon as generated (#9011)
* load newly generated image file as soon as generated to check if it is valid.
* delete the latest invalid image file
* fix
* fix
* get filePath from saveImage() to ensure deleting the correct file while exception happens
* fix
Co-authored-by: wuhangze <wu...@jd.com>
---
.../java/org/apache/doris/catalog/Catalog.java | 4 ++-
.../java/org/apache/doris/master/Checkpoint.java | 34 +++++++++++++++++++++-
.../java/org/apache/doris/persist/MetaCleaner.java | 11 +++++++
3 files changed, 47 insertions(+), 2 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java
index 09a06a1596..e5e2914857 100755
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java
@@ -1950,7 +1950,8 @@ public class Catalog {
}
// Only called by checkpoint thread
- public void saveImage() throws IOException {
+ // return the latest image file's absolute path
+ public String saveImage() throws IOException {
// Write image.ckpt
Storage storage = new Storage(this.imageDir);
File curFile = storage.getImageFile(replayedJournalId.get());
@@ -1963,6 +1964,7 @@ public class Catalog {
curFile.delete();
throw new IOException();
}
+ return curFile.getAbsolutePath();
}
public void saveImage(File curFile, long replayedJournalId) throws IOException {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java b/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java
index 7865817a82..2d3c3ac271 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java
@@ -35,6 +35,8 @@ import org.apache.doris.system.Frontend;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import com.google.common.base.Strings;
+
import java.io.IOException;
import java.io.OutputStream;
import java.net.HttpURLConnection;
@@ -111,6 +113,8 @@ public class Checkpoint extends MasterDaemon {
catalog = Catalog.getCurrentCatalog();
catalog.setEditLog(editLog);
createStaticFieldForCkpt();
+ boolean exceptionCaught = false;
+ String latestImageFilePath = null;
try {
catalog.loadImage(imageDir);
catalog.replayJournal(checkPointVersion);
@@ -119,13 +123,25 @@ public class Checkpoint extends MasterDaemon {
checkPointVersion, catalog.getReplayedJournalId()));
}
catalog.fixBugAfterMetadataReplayed(false);
- catalog.saveImage();
+ latestImageFilePath = catalog.saveImage();
replayedJournalId = catalog.getReplayedJournalId();
+
+ // destroy checkpoint catalog, reclaim memory
+ catalog = null;
+ Catalog.destroyCheckpoint();
+ destroyStaticFieldForCkpt();
+
+ // Load image to verify if the newly generated image file is valid
+ // If success, do all the following jobs
+ // If failed, just return
+ catalog = Catalog.getCurrentCatalog();
+ catalog.loadImage(imageDir);
if (MetricRepo.isInit) {
MetricRepo.COUNTER_IMAGE_WRITE_SUCCESS.increase(1L);
}
LOG.info("checkpoint finished save image.{}", replayedJournalId);
} catch (Throwable e) {
+ exceptionCaught = true;
e.printStackTrace();
LOG.error("Exception when generate new image file", e);
if (MetricRepo.isInit) {
@@ -137,6 +153,22 @@ public class Checkpoint extends MasterDaemon {
catalog = null;
Catalog.destroyCheckpoint();
destroyStaticFieldForCkpt();
+ // if new image generated && exception caught, delete the latest image here
+ // delete the newest image file, cuz it is invalid
+ if ((!Strings.isNullOrEmpty(latestImageFilePath)) && exceptionCaught) {
+ MetaCleaner cleaner = new MetaCleaner(Config.meta_dir + "/image");
+ try {
+ cleaner.cleanTheLatestInvalidImageFile(latestImageFilePath);
+ if (MetricRepo.isInit) {
+ MetricRepo.COUNTER_IMAGE_CLEAN_SUCCESS.increase(1L);
+ }
+ } catch (Throwable ex) {
+ LOG.error("Master delete latest invalid image file failed.", ex);
+ if (MetricRepo.isInit) {
+ MetricRepo.COUNTER_IMAGE_CLEAN_FAILED.increase(1L);
+ }
+ }
+ }
}
// push image file to all the other non master nodes
diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java b/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java
index 2a7acd1f09..dad0f0d0aa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java
@@ -67,6 +67,17 @@ public class MetaCleaner {
}
}
}
+
+ public void cleanTheLatestInvalidImageFile(String path) throws IOException {
+ File latestInvalidImage = new File(path);
+ if (latestInvalidImage.exists()) {
+ if (latestInvalidImage.delete()) {
+ LOG.info(latestInvalidImage.getAbsoluteFile() + " deleted.");
+ } else {
+ LOG.warn(latestInvalidImage.getAbsoluteFile() + " delete failed.");
+ }
+ }
+ }
private String fileType(File file) throws IOException {
String type = null;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org