You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uniffle.apache.org by ro...@apache.org on 2023/03/03 12:16:49 UTC

[incubator-uniffle] branch master updated: [#675] fix: filter no space exception in checkStorageReadAndWrite (#677)

This is an automated email from the ASF dual-hosted git repository.

roryqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
     new eb14ae45 [#675] fix: filter no space exception in checkStorageReadAndWrite (#677)
eb14ae45 is described below

commit eb14ae45fc7e9b83f4370b18c7a684ef23479c5e
Author: xumanbu <ma...@163.com>
AuthorDate: Fri Mar 3 20:16:44 2023 +0800

    [#675] fix: filter no space exception in checkStorageReadAndWrite (#677)
    
    ### What changes were proposed in this pull request?
    
    Filter no space exception in checkStorageReadAndWrite
    
    ### Why are the changes needed?
    
    Fix: #675
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    UT
    
    Co-authored-by: jam.xu <ja...@vipshop.com>
---
 .../org/apache/uniffle/common/util/Constants.java  |  1 +
 .../apache/uniffle/server/LocalStorageChecker.java |  6 ++
 .../apache/uniffle/server/StorageCheckerTest.java  | 84 ++++++++++++++++++++++
 3 files changed, 91 insertions(+)

diff --git a/common/src/main/java/org/apache/uniffle/common/util/Constants.java b/common/src/main/java/org/apache/uniffle/common/util/Constants.java
index 960ca664..b62d5b03 100644
--- a/common/src/main/java/org/apache/uniffle/common/util/Constants.java
+++ b/common/src/main/java/org/apache/uniffle/common/util/Constants.java
@@ -70,4 +70,5 @@ public final class Constants {
   public static final double MR_SLOW_START_DEFAULT_VALUE = 0.05;
   
   public static final double MILLION_SECONDS_PER_SECOND = 1E3D;
+  public static final String DEVICE_NO_SPACE_ERROR_MESSAGE = "No space left on device";
 }
diff --git a/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java b/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java
index e833441a..368db950 100644
--- a/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java
+++ b/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java
@@ -35,6 +35,8 @@ import org.apache.uniffle.common.util.RssUtils;
 import org.apache.uniffle.storage.common.LocalStorage;
 import org.apache.uniffle.storage.util.ShuffleStorageUtils;
 
+import static org.apache.uniffle.common.util.Constants.DEVICE_NO_SPACE_ERROR_MESSAGE;
+
 public class LocalStorageChecker extends Checker {
 
   private static final Logger LOG = LoggerFactory.getLogger(LocalStorageChecker.class);
@@ -200,6 +202,10 @@ public class LocalStorageChecker extends Checker {
         }
       } catch (Exception e) {
         LOG.error("Storage read and write error. Storage dir: {}", storageDir, e);
+        // avoid check bad track failure due to lack of disk space
+        if (e.getMessage() !=  null && DEVICE_NO_SPACE_ERROR_MESSAGE.equals(e.getMessage())) {
+          return true;
+        }
         return false;
       } finally {
         try {
diff --git a/server/src/test/java/org/apache/uniffle/server/StorageCheckerTest.java b/server/src/test/java/org/apache/uniffle/server/StorageCheckerTest.java
index c712e989..889bebda 100644
--- a/server/src/test/java/org/apache/uniffle/server/StorageCheckerTest.java
+++ b/server/src/test/java/org/apache/uniffle/server/StorageCheckerTest.java
@@ -18,10 +18,12 @@
 package org.apache.uniffle.server;
 
 import java.io.File;
+import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
 
 import com.google.common.collect.Lists;
+import org.apache.commons.io.FileUtils;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
@@ -30,6 +32,7 @@ import org.junit.jupiter.api.io.TempDir;
 import org.apache.uniffle.storage.common.LocalStorage;
 import org.apache.uniffle.storage.util.StorageType;
 
+import static org.apache.uniffle.common.util.Constants.DEVICE_NO_SPACE_ERROR_MESSAGE;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -102,6 +105,19 @@ public class StorageCheckerTest {
     assertEquals(250, ShuffleServerMetrics.gaugeLocalStorageUsedSpace.get());
     assertEquals(3, ShuffleServerMetrics.gaugeLocalStorageTotalDirsNum.get());
     assertEquals(0, ShuffleServerMetrics.gaugeLocalStorageCorruptedDirsNum.get());
+
+    storages.clear();
+    storages.add(LocalStorage.newBuilder().basePath(st1).build());
+    checker = new MockNoSpaceStorageChecker(conf, storages);
+    assertTrue(checker.checkIsHealthy());
+    storages.clear();
+    storages.add(LocalStorage.newBuilder().basePath(st2).build());
+    checker = new MockNoSpaceStorageChecker(conf, storages);
+    assertTrue(checker.checkIsHealthy());
+    storages.clear();
+    storages.add(LocalStorage.newBuilder().basePath(st3).build());
+    checker = new MockNoSpaceStorageChecker(conf, storages);
+    assertFalse(checker.checkIsHealthy());
   }
 
   private class MockStorageChecker extends LocalStorageChecker {
@@ -173,5 +189,73 @@ public class StorageCheckerTest {
       }
       return result;
     }
+
+    @Override
+    public boolean checkIsHealthy() {
+      return super.checkIsHealthy();
+    }
   }
+
+  private class MockNoSpaceStorageChecker extends LocalStorageChecker {
+
+    private final List<MockNoSpaceStorageInfo> mockNoSpaceStorageInfos  = Lists.newArrayList();
+
+    MockNoSpaceStorageChecker(ShuffleServerConf conf, List<LocalStorage> storages) {
+      super(conf, storages);
+      for (LocalStorage storage : storages) {
+        mockNoSpaceStorageInfos.add(new MockNoSpaceStorageInfo(storage));
+      }
+    }
+
+    @Override
+    public boolean checkIsHealthy() {
+      for (StorageInfo storageInfo : mockNoSpaceStorageInfos) {
+        if (!storageInfo.checkStorageReadAndWrite()) {
+          return false;
+        }
+      }
+      return true;
+    }
+
+    private class MockNoSpaceStorageInfo extends StorageInfo {
+
+      private final File storageDir;
+      private final LocalStorage storage;
+
+      MockNoSpaceStorageInfo(LocalStorage storage) {
+        super(storage);
+        this.storageDir = new File(storage.getBasePath());
+        this.storage = storage;
+      }
+
+      @Override
+      boolean checkStorageReadAndWrite() {
+        if (storage.isCorrupted()) {
+          return false;
+        }
+        File checkDir = new File(storageDir, CHECKER_DIR_NAME);
+        try {
+          if (storage.getBasePath().contains("st2")) {
+            throw new IOException("No space left on device");
+          }
+          if (storage.getBasePath().contains("st3")) {
+            throw new IOException("mock");
+          }
+        } catch (Exception e) {
+          if (e.getMessage() != null && DEVICE_NO_SPACE_ERROR_MESSAGE.equals(e.getMessage())) {
+            return true;
+          }
+          return false;
+        } finally {
+          try {
+            FileUtils.deleteDirectory(checkDir);
+          } catch (IOException ioe) {
+            return false;
+          }
+        }
+        return true;
+      }
+    }
+  }
+
 }