You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uniffle.apache.org by ro...@apache.org on 2023/03/03 12:16:49 UTC
[incubator-uniffle] branch master updated: [#675] fix: filter no space exception in checkStorageReadAndWrite (#677)
This is an automated email from the ASF dual-hosted git repository.
roryqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git
The following commit(s) were added to refs/heads/master by this push:
new eb14ae45 [#675] fix: filter no space exception in checkStorageReadAndWrite (#677)
eb14ae45 is described below
commit eb14ae45fc7e9b83f4370b18c7a684ef23479c5e
Author: xumanbu <ma...@163.com>
AuthorDate: Fri Mar 3 20:16:44 2023 +0800
[#675] fix: filter no space exception in checkStorageReadAndWrite (#677)
### What changes were proposed in this pull request?
Filter no space exception in checkStorageReadAndWrite
### Why are the changes needed?
Fix: #675
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
UT
Co-authored-by: jam.xu <ja...@vipshop.com>
---
.../org/apache/uniffle/common/util/Constants.java | 1 +
.../apache/uniffle/server/LocalStorageChecker.java | 6 ++
.../apache/uniffle/server/StorageCheckerTest.java | 84 ++++++++++++++++++++++
3 files changed, 91 insertions(+)
diff --git a/common/src/main/java/org/apache/uniffle/common/util/Constants.java b/common/src/main/java/org/apache/uniffle/common/util/Constants.java
index 960ca664..b62d5b03 100644
--- a/common/src/main/java/org/apache/uniffle/common/util/Constants.java
+++ b/common/src/main/java/org/apache/uniffle/common/util/Constants.java
@@ -70,4 +70,5 @@ public final class Constants {
public static final double MR_SLOW_START_DEFAULT_VALUE = 0.05;
public static final double MILLION_SECONDS_PER_SECOND = 1E3D;
+ public static final String DEVICE_NO_SPACE_ERROR_MESSAGE = "No space left on device";
}
diff --git a/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java b/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java
index e833441a..368db950 100644
--- a/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java
+++ b/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java
@@ -35,6 +35,8 @@ import org.apache.uniffle.common.util.RssUtils;
import org.apache.uniffle.storage.common.LocalStorage;
import org.apache.uniffle.storage.util.ShuffleStorageUtils;
+import static org.apache.uniffle.common.util.Constants.DEVICE_NO_SPACE_ERROR_MESSAGE;
+
public class LocalStorageChecker extends Checker {
private static final Logger LOG = LoggerFactory.getLogger(LocalStorageChecker.class);
@@ -200,6 +202,10 @@ public class LocalStorageChecker extends Checker {
}
} catch (Exception e) {
LOG.error("Storage read and write error. Storage dir: {}", storageDir, e);
+ // avoid check bad track failure due to lack of disk space
+ if (e.getMessage() != null && DEVICE_NO_SPACE_ERROR_MESSAGE.equals(e.getMessage())) {
+ return true;
+ }
return false;
} finally {
try {
diff --git a/server/src/test/java/org/apache/uniffle/server/StorageCheckerTest.java b/server/src/test/java/org/apache/uniffle/server/StorageCheckerTest.java
index c712e989..889bebda 100644
--- a/server/src/test/java/org/apache/uniffle/server/StorageCheckerTest.java
+++ b/server/src/test/java/org/apache/uniffle/server/StorageCheckerTest.java
@@ -18,10 +18,12 @@
package org.apache.uniffle.server;
import java.io.File;
+import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import com.google.common.collect.Lists;
+import org.apache.commons.io.FileUtils;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
@@ -30,6 +32,7 @@ import org.junit.jupiter.api.io.TempDir;
import org.apache.uniffle.storage.common.LocalStorage;
import org.apache.uniffle.storage.util.StorageType;
+import static org.apache.uniffle.common.util.Constants.DEVICE_NO_SPACE_ERROR_MESSAGE;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -102,6 +105,19 @@ public class StorageCheckerTest {
assertEquals(250, ShuffleServerMetrics.gaugeLocalStorageUsedSpace.get());
assertEquals(3, ShuffleServerMetrics.gaugeLocalStorageTotalDirsNum.get());
assertEquals(0, ShuffleServerMetrics.gaugeLocalStorageCorruptedDirsNum.get());
+
+ storages.clear();
+ storages.add(LocalStorage.newBuilder().basePath(st1).build());
+ checker = new MockNoSpaceStorageChecker(conf, storages);
+ assertTrue(checker.checkIsHealthy());
+ storages.clear();
+ storages.add(LocalStorage.newBuilder().basePath(st2).build());
+ checker = new MockNoSpaceStorageChecker(conf, storages);
+ assertTrue(checker.checkIsHealthy());
+ storages.clear();
+ storages.add(LocalStorage.newBuilder().basePath(st3).build());
+ checker = new MockNoSpaceStorageChecker(conf, storages);
+ assertFalse(checker.checkIsHealthy());
}
private class MockStorageChecker extends LocalStorageChecker {
@@ -173,5 +189,73 @@ public class StorageCheckerTest {
}
return result;
}
+
+ @Override
+ public boolean checkIsHealthy() {
+ return super.checkIsHealthy();
+ }
}
+
+ private class MockNoSpaceStorageChecker extends LocalStorageChecker {
+
+ private final List<MockNoSpaceStorageInfo> mockNoSpaceStorageInfos = Lists.newArrayList();
+
+ MockNoSpaceStorageChecker(ShuffleServerConf conf, List<LocalStorage> storages) {
+ super(conf, storages);
+ for (LocalStorage storage : storages) {
+ mockNoSpaceStorageInfos.add(new MockNoSpaceStorageInfo(storage));
+ }
+ }
+
+ @Override
+ public boolean checkIsHealthy() {
+ for (StorageInfo storageInfo : mockNoSpaceStorageInfos) {
+ if (!storageInfo.checkStorageReadAndWrite()) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private class MockNoSpaceStorageInfo extends StorageInfo {
+
+ private final File storageDir;
+ private final LocalStorage storage;
+
+ MockNoSpaceStorageInfo(LocalStorage storage) {
+ super(storage);
+ this.storageDir = new File(storage.getBasePath());
+ this.storage = storage;
+ }
+
+ @Override
+ boolean checkStorageReadAndWrite() {
+ if (storage.isCorrupted()) {
+ return false;
+ }
+ File checkDir = new File(storageDir, CHECKER_DIR_NAME);
+ try {
+ if (storage.getBasePath().contains("st2")) {
+ throw new IOException("No space left on device");
+ }
+ if (storage.getBasePath().contains("st3")) {
+ throw new IOException("mock");
+ }
+ } catch (Exception e) {
+ if (e.getMessage() != null && DEVICE_NO_SPACE_ERROR_MESSAGE.equals(e.getMessage())) {
+ return true;
+ }
+ return false;
+ } finally {
+ try {
+ FileUtils.deleteDirectory(checkDir);
+ } catch (IOException ioe) {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+ }
+
}