You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by aa...@apache.org on 2021/12/06 11:23:15 UTC
[hadoop] branch branch-2.10 updated: YARN-9063. ATS 1.5 fails to start if RollingLevelDb files are corrupt or missing (#3728)
This is an automated email from the ASF dual-hosted git repository.
aajisaka pushed a commit to branch branch-2.10
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-2.10 by this push:
new 0b9ae87 YARN-9063. ATS 1.5 fails to start if RollingLevelDb files are corrupt or missing (#3728)
0b9ae87 is described below
commit 0b9ae87ee70d95fe6422c2417a48589a17eddc80
Author: Ashutosh Gupta <as...@st.niituniversity.in>
AuthorDate: Mon Dec 6 16:15:44 2021 +0530
YARN-9063. ATS 1.5 fails to start if RollingLevelDb files are corrupt or missing (#3728)
Signed-off-by: Akira Ajisaka <aa...@apache.org>
(cherry picked from commit 5a950b8900aa143f6db281eb22928965f3b2754f)
Conflicts:
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/timeline/TestRollingLevelDBTimelineStore.java
---
.../yarn/server/timeline/LeveldbTimelineStore.java | 18 ++--------
.../timeline/RollingLevelDBTimelineStore.java | 27 +++++++++++----
.../yarn/server/timeline/util/LeveldbUtils.java | 32 ++++++++++++++++++
.../timeline/TestRollingLevelDBTimelineStore.java | 38 +++++++++++++++++++++-
4 files changed, 91 insertions(+), 24 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/LeveldbTimelineStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/LeveldbTimelineStore.java
index e3db1dc..ac4d495 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/LeveldbTimelineStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/LeveldbTimelineStore.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.yarn.server.timeline;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.apache.commons.collections.map.LRUMap;
-import org.apache.commons.io.FileUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability;
@@ -32,7 +31,6 @@ import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.service.AbstractService;
-import org.apache.hadoop.util.Time;
import org.apache.hadoop.yarn.api.records.timeline.*;
import org.apache.hadoop.yarn.api.records.timeline.TimelineEvents.EventsOfOneEntity;
import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse.TimelinePutError;
@@ -41,6 +39,7 @@ import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.VersionProto;
import org.apache.hadoop.yarn.server.records.Version;
import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl;
import org.apache.hadoop.yarn.server.timeline.TimelineDataManager.CheckAcl;
+import org.apache.hadoop.yarn.server.timeline.util.LeveldbUtils;
import org.apache.hadoop.yarn.server.timeline.util.LeveldbUtils.KeyBuilder;
import org.apache.hadoop.yarn.server.timeline.util.LeveldbUtils.KeyParser;
import org.apache.hadoop.yarn.server.utils.LeveldbIterator;
@@ -48,7 +47,6 @@ import org.fusesource.leveldbjni.JniDBFactory;
import org.iq80.leveldb.*;
import org.slf4j.LoggerFactory;
-import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.*;
@@ -242,19 +240,7 @@ public class LeveldbTimelineStore extends AbstractService
IOUtils.cleanupWithLogger(LOG, localFS);
}
LOG.info("Using leveldb path " + dbPath);
- try {
- db = factory.open(new File(dbPath.toString()), options);
- } catch (IOException ioe) {
- File dbFile = new File(dbPath.toString());
- File backupPath = new File(
- dbPath.toString() + BACKUP_EXT + Time.monotonicNow());
- LOG.warn("Incurred exception while loading LevelDb database. Backing " +
- "up at "+ backupPath, ioe);
- FileUtils.copyDirectory(dbFile, backupPath);
- LOG.warn("Going to try repair");
- factory.repair(dbFile, options);
- db = factory.open(dbFile, options);
- }
+ db = LeveldbUtils.loadOrRepairLevelDb(factory, dbPath, options);
checkVersion();
startTimeWriteCache =
Collections.synchronizedMap(new LRUMap(getStartTimeWriteCacheSize(
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/RollingLevelDBTimelineStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/RollingLevelDBTimelineStore.java
index 5087db4..38c8d59 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/RollingLevelDBTimelineStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/RollingLevelDBTimelineStore.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.yarn.server.timeline;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
-import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
@@ -62,6 +61,7 @@ import org.apache.hadoop.yarn.server.records.Version;
import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl;
import org.apache.hadoop.yarn.server.timeline.RollingLevelDB.RollingWriteBatch;
import org.apache.hadoop.yarn.server.timeline.TimelineDataManager.CheckAcl;
+import org.apache.hadoop.yarn.server.timeline.util.LeveldbUtils;
import org.apache.hadoop.yarn.server.timeline.util.LeveldbUtils.KeyBuilder;
import org.apache.hadoop.yarn.server.timeline.util.LeveldbUtils.KeyParser;
@@ -199,6 +199,11 @@ public class RollingLevelDBTimelineStore extends AbstractService implements
static final String STARTTIME = "starttime-ldb";
static final String OWNER = "owner-ldb";
+ @VisibleForTesting
+ //Extension to FILENAME where backup will be stored in case we need to
+ //call LevelDb recovery
+ static final String BACKUP_EXT = ".backup-";
+
private static final byte[] DOMAIN_ID_COLUMN = "d".getBytes(UTF_8);
private static final byte[] EVENTS_COLUMN = "e".getBytes(UTF_8);
private static final byte[] PRIMARY_FILTERS_COLUMN = "f".getBytes(UTF_8);
@@ -240,6 +245,12 @@ public class RollingLevelDBTimelineStore extends AbstractService implements
super(RollingLevelDBTimelineStore.class.getName());
}
+ private JniDBFactory factory;
+ @VisibleForTesting
+ void setFactory(JniDBFactory fact) {
+ this.factory = fact;
+ }
+
@Override
@SuppressWarnings("unchecked")
protected void serviceInit(Configuration conf) throws Exception {
@@ -284,7 +295,9 @@ public class RollingLevelDBTimelineStore extends AbstractService implements
options.cacheSize(conf.getLong(
TIMELINE_SERVICE_LEVELDB_READ_CACHE_SIZE,
DEFAULT_TIMELINE_SERVICE_LEVELDB_READ_CACHE_SIZE));
- JniDBFactory factory = new JniDBFactory();
+ if(factory == null) {
+ factory = new JniDBFactory();
+ }
Path dbPath = new Path(
conf.get(TIMELINE_SERVICE_LEVELDB_PATH), FILENAME);
Path domainDBPath = new Path(dbPath, DOMAIN);
@@ -327,13 +340,13 @@ public class RollingLevelDBTimelineStore extends AbstractService implements
TIMELINE_SERVICE_LEVELDB_WRITE_BUFFER_SIZE,
DEFAULT_TIMELINE_SERVICE_LEVELDB_WRITE_BUFFER_SIZE));
LOG.info("Using leveldb path " + dbPath);
- domaindb = factory.open(new File(domainDBPath.toString()), options);
+ domaindb = LeveldbUtils.loadOrRepairLevelDb(factory, domainDBPath, options);
entitydb = new RollingLevelDB(ENTITY);
entitydb.init(conf);
indexdb = new RollingLevelDB(INDEX);
indexdb.init(conf);
- starttimedb = factory.open(new File(starttimeDBPath.toString()), options);
- ownerdb = factory.open(new File(ownerDBPath.toString()), options);
+ starttimedb = LeveldbUtils.loadOrRepairLevelDb(factory, starttimeDBPath, options);
+ ownerdb = LeveldbUtils.loadOrRepairLevelDb(factory, ownerDBPath, options);
checkVersion();
startTimeWriteCache = Collections.synchronizedMap(new LRUMap(
getStartTimeWriteCacheSize(conf)));
@@ -346,7 +359,7 @@ public class RollingLevelDBTimelineStore extends AbstractService implements
super.serviceInit(conf);
}
-
+
@Override
protected void serviceStart() throws Exception {
if (getConfig().getBoolean(TIMELINE_SERVICE_TTL_ENABLE, true)) {
@@ -1828,4 +1841,4 @@ public class RollingLevelDBTimelineStore extends AbstractService implements
return domain;
}
}
-}
+}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/util/LeveldbUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/util/LeveldbUtils.java
index 82c7f26..fcdb2ac 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/util/LeveldbUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/util/LeveldbUtils.java
@@ -19,16 +19,30 @@
package org.apache.hadoop.yarn.server.timeline.util;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.util.Time;
+import java.io.File;
import java.io.IOException;
+import org.fusesource.leveldbjni.JniDBFactory;
+import org.iq80.leveldb.DB;
+import org.iq80.leveldb.Options;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.hadoop.yarn.server.timeline.GenericObjectMapper.readReverseOrderedLong;
public class LeveldbUtils {
+ private static final String BACKUP_EXT = ".backup-";
+ private static final Logger LOG = LoggerFactory
+ .getLogger(LeveldbUtils.class);
+
/** A string builder utility for building timeline server leveldb keys. */
public static class KeyBuilder {
/** Maximum subkeys that can be added to construct a key. */
@@ -184,4 +198,22 @@ public class LeveldbUtils {
public static final FsPermission LEVELDB_DIR_UMASK = FsPermission
.createImmutable((short) 0700);
+ public static DB loadOrRepairLevelDb(JniDBFactory factory, Path dbPath, Options options)
+ throws IOException {
+ DB db;
+ try{
+ db = factory.open(new File(dbPath.toString()), options);
+ } catch (IOException ioe){
+ File dbFile = new File(dbPath.toString());
+ File dbBackupPath = new File(
+ dbPath.toString() + BACKUP_EXT + Time.monotonicNow());
+ LOG.warn("Incurred exception while loading LevelDb database. Backing " +
+ "up at "+ dbBackupPath, ioe);
+ FileUtils.copyDirectory(dbFile, dbBackupPath);
+ factory.repair(dbFile, options);
+ db = factory.open(dbFile, options);
+ }
+ return db;
+ }
+
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/timeline/TestRollingLevelDBTimelineStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/timeline/TestRollingLevelDBTimelineStore.java
index 956e9e9..0fc3fe4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/timeline/TestRollingLevelDBTimelineStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/timeline/TestRollingLevelDBTimelineStore.java
@@ -21,8 +21,10 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import java.io.File;
+import java.io.FilenameFilter;
import java.io.IOException;
+import org.apache.commons.io.filefilter.WildcardFileFilter;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
@@ -38,11 +40,15 @@ import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse;
import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse.TimelinePutError;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.records.Version;
+
+import org.fusesource.leveldbjni.JniDBFactory;
+import org.iq80.leveldb.Options;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mortbay.log.Log;
+import org.mockito.Mockito;
/** Test class to verify RollingLevelDBTimelineStore. */
@InterfaceAudience.Private
@@ -417,6 +423,36 @@ public class TestRollingLevelDBTimelineStore extends TimelineStoreTestUtils {
Log.info("Duration for " + num + ": " + duration);
}
+ @Test
+ /**
+ * Test that RollingLevelDb repair is attempted at least once during
+ * serviceInit for RollingLeveldbTimelineStore in case open fails the
+ * first time.
+ */ public void testLevelDbRepair() throws IOException {
+ RollingLevelDBTimelineStore store = new RollingLevelDBTimelineStore();
+ JniDBFactory factory = Mockito.mock(JniDBFactory.class);
+ Mockito.when(factory.open(Mockito.any(File.class), Mockito.any(Options.class)))
+ .thenThrow(new IOException()).thenCallRealMethod();
+ store.setFactory(factory);
+
+ //Create the LevelDb in a different location
+ File path = new File("target", this.getClass().getSimpleName() + "-tmpDir2").getAbsoluteFile();
+ Configuration conf = new Configuration(this.config);
+ conf.set(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_PATH, path.getAbsolutePath());
+ try {
+ store.init(conf);
+ Mockito.verify(factory, Mockito.times(1))
+ .repair(Mockito.any(File.class), Mockito.any(Options.class));
+ FilenameFilter fileFilter =
+ new WildcardFileFilter("*" + RollingLevelDBTimelineStore.BACKUP_EXT + "*");
+ Assert.assertTrue(new File(path.getAbsolutePath(), RollingLevelDBTimelineStore.FILENAME)
+ .list(fileFilter).length > 0);
+ } finally {
+ store.close();
+ fsContext.delete(new Path(path.getAbsolutePath()), true);
+ }
+ }
+
public static void main(String[] args) throws Exception {
TestRollingLevelDBTimelineStore store =
new TestRollingLevelDBTimelineStore();
@@ -424,4 +460,4 @@ public class TestRollingLevelDBTimelineStore extends TimelineStoreTestUtils {
store.testStorePerformance();
store.tearDown();
}
-}
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org