You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by yq...@apache.org on 2019/04/03 06:01:56 UTC
[hadoop] branch trunk updated: HDDS-1365. Fix error handling in
KeyValueContainerCheck. Contributed by Supratim Deka.
This is an automated email from the ASF dual-hosted git repository.
yqlin pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new f96fb05 HDDS-1365. Fix error handling in KeyValueContainerCheck. Contributed by Supratim Deka.
f96fb05 is described below
commit f96fb05a2b889f3acdfae60d8f64c755ff48b8c1
Author: Yiqun Lin <yq...@apache.org>
AuthorDate: Wed Apr 3 14:01:30 2019 +0800
HDDS-1365. Fix error handling in KeyValueContainerCheck. Contributed by Supratim Deka.
---
.../container/keyvalue/KeyValueContainerCheck.java | 285 ++++++---------------
.../keyvalue/TestKeyValueContainerCheck.java | 11 +-
2 files changed, 81 insertions(+), 215 deletions(-)
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java
index b1ab1e1..bdfdf21 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java
@@ -68,9 +68,26 @@ public class KeyValueContainerCheck {
}
/**
- * fast checks are basic and do not look inside the metadata files.
- * Or into the structures on disk. These checks can be done on Open
- * containers as well without concurrency implications
+ * Run basic integrity checks on container metadata.
+ * These checks do not look inside the metadata files.
+ * Applicable for OPEN containers.
+ *
+ * @return true : corruption detected, false : no corruption.
+ */
+ public boolean fastCheck() {
+ boolean corruption = false;
+ try {
+ basicChecks();
+
+ } catch (IOException e) {
+ handleCorruption(e);
+ corruption = true;
+ }
+
+ return corruption;
+ }
+
+ /**
* Checks :
* 1. check directory layout
* 2. check container file
@@ -78,24 +95,14 @@ public class KeyValueContainerCheck {
* @return void
*/
- public KvCheckError fastCheck() {
-
- KvCheckError error;
- LOG.trace("Running fast check for container {};", containerID);
-
- error = loadContainerData();
- if (error != KvCheckError.ERROR_NONE) {
- return error;
- }
+ private void basicChecks() throws IOException {
- error = checkLayout();
- if (error != KvCheckError.ERROR_NONE) {
- return error;
- }
+ LOG.trace("Running basic checks for container {};", containerID);
- error = checkContainerFile();
+ loadContainerData();
- return error;
+ checkLayout();
+ checkContainerFile();
}
/**
@@ -107,129 +114,80 @@ public class KeyValueContainerCheck {
* <p>
* fullCheck is a superset of fastCheck
*
- * @return void
+ * @return true : corruption detected, false : no corruption.
*/
- public KvCheckError fullCheck() {
- /**
-
- */
- KvCheckError error;
+ public boolean fullCheck() {
+ boolean corruption = false;
- error = fastCheck();
- if (error != KvCheckError.ERROR_NONE) {
+ try {
+ basicChecks();
+ checkBlockDB();
- LOG.trace("fastCheck failed, aborting full check for Container {}",
- containerID);
- return error;
+ } catch (IOException e) {
+ handleCorruption(e);
+ corruption = true;
}
- error = checkBlockDB();
-
- return error;
+ return corruption;
}
/**
* Check the integrity of the directory structure of the container.
- *
- * @return error code or ERROR_NONE
*/
- private KvCheckError checkLayout() {
- boolean success;
- KvCheckError error = KvCheckError.ERROR_NONE;
+ private void checkLayout() throws IOException {
// is metadataPath accessible as a directory?
- try {
- checkDirPath(metadataPath);
- } catch (IOException ie) {
- error = KvCheckError.METADATA_PATH_ACCESS;
- handleCorruption(ie.getMessage(), error, ie);
- return error;
- }
+ checkDirPath(metadataPath);
- String chunksPath = onDiskContainerData.getChunksPath();
// is chunksPath accessible as a directory?
- try {
- checkDirPath(chunksPath);
- } catch (IOException ie) {
- error = KvCheckError.CHUNKS_PATH_ACCESS;
- handleCorruption(ie.getMessage(), error, ie);
- return error;
- }
-
- return error;
+ String chunksPath = onDiskContainerData.getChunksPath();
+ checkDirPath(chunksPath);
}
private void checkDirPath(String path) throws IOException {
File dirPath = new File(path);
String errStr = null;
- boolean success = true;
try {
if (!dirPath.isDirectory()) {
- success = false;
errStr = "Not a directory [" + path + "]";
+ throw new IOException(errStr);
}
} catch (SecurityException se) {
throw new IOException("Security exception checking dir ["
+ path + "]", se);
- } catch (Exception e) {
- throw new IOException("Generic exception checking dir ["
- + path + "]", e);
- }
-
- try {
- String[] ls = dirPath.list();
- if (ls == null) {
- // null result implies operation failed
- success = false;
- errStr = "null listing for directory [" + path + "]";
- }
- } catch (Exception e) {
- throw new IOException("Exception listing dir [" + path + "]", e);
}
- if (!success) {
- Preconditions.checkState(errStr != null);
+ String[] ls = dirPath.list();
+ if (ls == null) {
+ // null result implies operation failed
+ errStr = "null listing for directory [" + path + "]";
throw new IOException(errStr);
}
}
- private KvCheckError checkContainerFile() {
+ private void checkContainerFile() throws IOException {
/**
* compare the values in the container file loaded from disk,
* with the values we are expecting
*/
- KvCheckError error = KvCheckError.ERROR_NONE;
String dbType;
Preconditions
.checkState(onDiskContainerData != null, "Container File not loaded");
- KvCheckAction next;
- try {
- ContainerUtils.verifyChecksum(onDiskContainerData);
- } catch (Exception e) {
- error = KvCheckError.CONTAINERDATA_CKSUM;
- handleCorruption("Container File Checksum mismatch", error, e);
- return error;
- }
+ ContainerUtils.verifyChecksum(onDiskContainerData);
if (onDiskContainerData.getContainerType()
!= ContainerProtos.ContainerType.KeyValueContainer) {
String errStr = "Bad Container type in Containerdata for " + containerID;
- error = KvCheckError.CONTAINERDATA_TYPE;
- handleCorruption(errStr, error, null);
- return error; // Abort if we do not know the type of Container
+ throw new IOException(errStr);
}
if (onDiskContainerData.getContainerID() != containerID) {
String errStr =
"Bad ContainerID field in Containerdata for " + containerID;
- error = KvCheckError.CONTAINERDATA_ID;
- next = handleCorruption(errStr, error, null);
- if (next == KvCheckAction.ABORT) {
- return error;
- } // else continue checking other data elements
+ throw new IOException(errStr);
}
dbType = onDiskContainerData.getContainerDBType();
@@ -237,9 +195,7 @@ public class KeyValueContainerCheck {
!dbType.equals(OZONE_METADATA_STORE_IMPL_LEVELDB)) {
String errStr = "Unknown DBType [" + dbType
+ "] in Container File for [" + containerID + "]";
- error = KvCheckError.CONTAINERDATA_DBTYPE;
- handleCorruption(errStr, error, null);
- return error;
+ throw new IOException(errStr);
}
KeyValueContainerData kvData = onDiskContainerData;
@@ -248,17 +204,11 @@ public class KeyValueContainerCheck {
"Bad metadata path in Containerdata for " + containerID + "Expected ["
+ metadataPath.toString() + "] Got [" + kvData.getMetadataPath()
+ "]";
- error = KvCheckError.CONTAINERDATA_METADATA_PATH;
- next = handleCorruption(errStr, error, null);
- if (next == KvCheckAction.ABORT) {
- return error;
- }
+ throw new IOException(errStr);
}
-
- return error;
}
- private KvCheckError checkBlockDB() {
+ private void checkBlockDB() throws IOException {
/**
* Check the integrity of the DB inside each container.
* In Scope:
@@ -269,52 +219,31 @@ public class KeyValueContainerCheck {
* 1. chunk checksum verification. this is left to a separate
* slow chunk scanner
*/
- KvCheckError error;
Preconditions.checkState(onDiskContainerData != null,
"invoke loadContainerData prior to calling this function");
File dbFile;
File metaDir = new File(metadataPath);
- try {
- dbFile = KeyValueContainerLocationUtil
- .getContainerDBFile(metaDir, containerID);
-
- if (!dbFile.exists() || !dbFile.canRead()) {
+ dbFile = KeyValueContainerLocationUtil
+ .getContainerDBFile(metaDir, containerID);
- String dbFileErrorMsg = "Unable to access DB File [" + dbFile.toString()
- + "] for Container [" + containerID + "] metadata path ["
- + metadataPath + "]";
- error = KvCheckError.DB_ACCESS;
- handleCorruption(dbFileErrorMsg, error, null);
- return error;
- }
- } catch (Exception e) {
- String dbFileErrorMessage =
- "Exception when initializing DBFile" + "with metadatapath ["
- + metadataPath + "] for Container [" + containerID
- + "]";
- error = KvCheckError.DB_ACCESS;
- handleCorruption(dbFileErrorMessage, error, e);
- return error;
+ if (!dbFile.exists() || !dbFile.canRead()) {
+ String dbFileErrorMsg = "Unable to access DB File [" + dbFile.toString()
+ + "] for Container [" + containerID + "] metadata path ["
+ + metadataPath + "]";
+ throw new IOException(dbFileErrorMsg);
}
- onDiskContainerData.setDbFile(dbFile);
- try {
- MetadataStore db = BlockUtils
- .getDB(onDiskContainerData, checkConfig);
- error = iterateBlockDB(db);
- } catch (Exception e) {
- error = KvCheckError.DB_ITERATOR;
- handleCorruption("Block DB Iterator aborted", error, e);
- return error;
- }
- return error;
+ onDiskContainerData.setDbFile(dbFile);
+ MetadataStore db = BlockUtils
+ .getDB(onDiskContainerData, checkConfig);
+
+ iterateBlockDB(db);
}
- private KvCheckError iterateBlockDB(MetadataStore db)
+ private void iterateBlockDB(MetadataStore db)
throws IOException {
- KvCheckError error = KvCheckError.ERROR_NONE;
Preconditions.checkState(db != null);
// get "normal" keys from the Block DB
@@ -328,103 +257,39 @@ public class KeyValueContainerCheck {
List<ContainerProtos.ChunkInfo> chunkInfoList = block.getChunks();
for (ContainerProtos.ChunkInfo chunk : chunkInfoList) {
File chunkFile;
- try {
- chunkFile = ChunkUtils
- .getChunkFile(onDiskContainerData,
- ChunkInfo.getFromProtoBuf(chunk));
- } catch (Exception e) {
- error = KvCheckError.MISSING_CHUNK_FILE;
- handleCorruption("Unable to access chunk path", error, e);
- return error;
- }
+ chunkFile = ChunkUtils.getChunkFile(onDiskContainerData,
+ ChunkInfo.getFromProtoBuf(chunk));
if (!chunkFile.exists()) {
- error = KvCheckError.MISSING_CHUNK_FILE;
-
// concurrent mutation in Block DB? lookup the block again.
byte[] bdata = db.get(
Longs.toByteArray(block.getBlockID().getLocalID()));
if (bdata == null) {
LOG.trace("concurrency with delete, ignoring deleted block");
- error = KvCheckError.ERROR_NONE;
break; // skip to next block from kvIter
} else {
- handleCorruption("Missing chunk file", error, null);
- return error;
+ String errorStr = "Missing chunk file "
+ + chunkFile.getAbsolutePath();
+ throw new IOException(errorStr);
}
}
}
}
-
- return error;
}
- private KvCheckError loadContainerData() {
- KvCheckError error = KvCheckError.ERROR_NONE;
+ private void loadContainerData() throws IOException {
File containerFile = KeyValueContainer
.getContainerFile(metadataPath.toString(), containerID);
- try {
- onDiskContainerData = (KeyValueContainerData) ContainerDataYaml
- .readContainerFile(containerFile);
- } catch (IOException e) {
- error = KvCheckError.FILE_LOAD;
- handleCorruption("Unable to load Container File", error, e);
- }
-
- return error;
+ onDiskContainerData = (KeyValueContainerData) ContainerDataYaml
+ .readContainerFile(containerFile);
}
- private KvCheckAction handleCorruption(String reason,
- KvCheckError error, Exception e) {
-
- // XXX HDDS-1201 need to implement corruption handling/reporting
-
+ private void handleCorruption(IOException e) {
String errStr =
- "Corruption detected in container: [" + containerID + "] reason: ["
- + reason + "] error code: [" + error + "]";
- String logMessage = null;
-
- StackTraceElement[] stackeElems = Thread.currentThread().getStackTrace();
- String caller =
- "Corruption reported from Source File: [" + stackeElems[2].getFileName()
- + "] Line: [" + stackeElems[2].getLineNumber() + "]";
-
- if (e != null) {
- logMessage = errStr + " exception: [" + e.getMessage() + "]";
- e.printStackTrace();
- } else {
- logMessage = errStr;
- }
-
- LOG.error(caller);
+ "Corruption detected in container: [" + containerID + "] ";
+ String logMessage = errStr + "Exception: [" + e.getMessage() + "]";
LOG.error(logMessage);
-
- return KvCheckAction.ABORT;
- }
-
- /**
- * Pre-defined error codes for Container Metadata check.
- */
- public enum KvCheckError {
- ERROR_NONE,
- FILE_LOAD, // unable to load container metafile
- METADATA_PATH_ACCESS, // metadata path is not accessible
- CHUNKS_PATH_ACCESS, // chunks path is not accessible
- CONTAINERDATA_ID, // bad Container-ID stored in Container file
- CONTAINERDATA_METADATA_PATH, // bad metadata path in Container file
- CONTAINERDATA_CHUNKS_PATH, // bad chunks path in Container file
- CONTAINERDATA_CKSUM, // container file checksum mismatch
- CONTAINERDATA_TYPE, // container file incorrect type of Container
- CONTAINERDATA_DBTYPE, // unknown DB Type specified in Container File
- DB_ACCESS, // unable to load Metastore DB
- DB_ITERATOR, // unable to create block iterator for Metastore DB
- MISSING_CHUNK_FILE // chunk file not found
- }
-
- private enum KvCheckAction {
- CONTINUE, // Continue with remaining checks on the corrupt Container
- ABORT // Abort checks for the container
}
}
\ No newline at end of file
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java
index 3395214..0bc1bbc 100644
--- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java
@@ -55,6 +55,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_IMPL;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_LEVELDB;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_ROCKSDB;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
/**
@@ -100,7 +101,7 @@ import static org.junit.Assert.assertTrue;
int deletedBlocks = 1;
int normalBlocks = 3;
int chunksPerBlock = 4;
- KeyValueContainerCheck.KvCheckError error;
+ boolean corruption = false;
// test Closed Container
createContainerWithBlocks(containerID, normalBlocks, deletedBlocks, 65536,
@@ -114,14 +115,14 @@ import static org.junit.Assert.assertTrue;
containerID);
// first run checks on a Open Container
- error = kvCheck.fastCheck();
- assertTrue(error == KeyValueContainerCheck.KvCheckError.ERROR_NONE);
+ corruption = kvCheck.fastCheck();
+ assertFalse(corruption);
container.close();
// next run checks on a Closed Container
- error = kvCheck.fullCheck();
- assertTrue(error == KeyValueContainerCheck.KvCheckError.ERROR_NONE);
+ corruption = kvCheck.fullCheck();
+ assertFalse(corruption);
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org