You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by yq...@apache.org on 2019/04/03 06:01:56 UTC

[hadoop] branch trunk updated: HDDS-1365. Fix error handling in KeyValueContainerCheck. Contributed by Supratim Deka.

This is an automated email from the ASF dual-hosted git repository.

yqlin pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new f96fb05  HDDS-1365. Fix error handling in KeyValueContainerCheck. Contributed by Supratim Deka.
f96fb05 is described below

commit f96fb05a2b889f3acdfae60d8f64c755ff48b8c1
Author: Yiqun Lin <yq...@apache.org>
AuthorDate: Wed Apr 3 14:01:30 2019 +0800

    HDDS-1365. Fix error handling in KeyValueContainerCheck. Contributed by Supratim Deka.
---
 .../container/keyvalue/KeyValueContainerCheck.java | 285 ++++++---------------
 .../keyvalue/TestKeyValueContainerCheck.java       |  11 +-
 2 files changed, 81 insertions(+), 215 deletions(-)

diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java
index b1ab1e1..bdfdf21 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java
@@ -68,9 +68,26 @@ public class KeyValueContainerCheck {
   }
 
   /**
-   * fast checks are basic and do not look inside the metadata files.
-   * Or into the structures on disk. These checks can be done on Open
-   * containers as well without concurrency implications
+   * Run basic integrity checks on container metadata.
+   * These checks do not look inside the metadata files.
+   * Applicable for OPEN containers.
+   *
+   * @return true : corruption detected, false : no corruption.
+   */
+  public boolean fastCheck() {
+    boolean corruption = false;
+    try {
+      basicChecks();
+
+    } catch (IOException e) {
+      handleCorruption(e);
+      corruption = true;
+    }
+
+    return corruption;
+  }
+
+  /**
    * Checks :
    * 1. check directory layout
    * 2. check container file
@@ -78,24 +95,14 @@ public class KeyValueContainerCheck {
    * @return void
    */
 
-  public KvCheckError fastCheck() {
-
-    KvCheckError error;
-    LOG.trace("Running fast check for container {};", containerID);
-
-    error = loadContainerData();
-    if (error != KvCheckError.ERROR_NONE) {
-      return error;
-    }
+  private void basicChecks() throws IOException {
 
-    error = checkLayout();
-    if (error != KvCheckError.ERROR_NONE) {
-      return error;
-    }
+    LOG.trace("Running basic checks for container {};", containerID);
 
-    error = checkContainerFile();
+    loadContainerData();
 
-    return error;
+    checkLayout();
+    checkContainerFile();
   }
 
   /**
@@ -107,129 +114,80 @@ public class KeyValueContainerCheck {
    * <p>
    * fullCheck is a superset of fastCheck
    *
-   * @return void
+   * @return true : corruption detected, false : no corruption.
    */
-  public KvCheckError fullCheck() {
-    /**
-
-     */
-    KvCheckError error;
+  public boolean fullCheck() {
+    boolean corruption = false;
 
-    error = fastCheck();
-    if (error != KvCheckError.ERROR_NONE) {
+    try {
+      basicChecks();
+      checkBlockDB();
 
-      LOG.trace("fastCheck failed, aborting full check for Container {}",
-          containerID);
-      return error;
+    } catch (IOException e) {
+      handleCorruption(e);
+      corruption = true;
     }
 
-    error = checkBlockDB();
-
-    return error;
+    return corruption;
   }
 
   /**
    * Check the integrity of the directory structure of the container.
-   *
-   * @return error code or ERROR_NONE
    */
-  private KvCheckError checkLayout() {
-    boolean success;
-    KvCheckError error = KvCheckError.ERROR_NONE;
+  private void checkLayout() throws IOException {
 
     // is metadataPath accessible as a directory?
-    try {
-      checkDirPath(metadataPath);
-    } catch (IOException ie) {
-      error = KvCheckError.METADATA_PATH_ACCESS;
-      handleCorruption(ie.getMessage(), error, ie);
-      return error;
-    }
+    checkDirPath(metadataPath);
 
-    String chunksPath = onDiskContainerData.getChunksPath();
     // is chunksPath accessible as a directory?
-    try {
-      checkDirPath(chunksPath);
-    } catch (IOException ie) {
-      error = KvCheckError.CHUNKS_PATH_ACCESS;
-      handleCorruption(ie.getMessage(), error, ie);
-      return error;
-    }
-
-    return error;
+    String chunksPath = onDiskContainerData.getChunksPath();
+    checkDirPath(chunksPath);
   }
 
   private void checkDirPath(String path) throws IOException {
 
     File dirPath = new File(path);
     String errStr = null;
-    boolean success = true;
 
     try {
       if (!dirPath.isDirectory()) {
-        success = false;
         errStr = "Not a directory [" + path + "]";
+        throw new IOException(errStr);
       }
     } catch (SecurityException se) {
       throw new IOException("Security exception checking dir ["
           + path + "]", se);
-    } catch (Exception e) {
-      throw new IOException("Generic exception checking dir ["
-          + path + "]", e);
-    }
-
-    try {
-      String[] ls = dirPath.list();
-      if (ls == null) {
-        // null result implies operation failed
-        success = false;
-        errStr = "null listing for directory [" + path + "]";
-      }
-    } catch (Exception e) {
-      throw new IOException("Exception listing dir [" + path + "]", e);
     }
 
-    if (!success) {
-      Preconditions.checkState(errStr != null);
+    String[] ls = dirPath.list();
+    if (ls == null) {
+      // null result implies operation failed
+      errStr = "null listing for directory [" + path + "]";
       throw new IOException(errStr);
     }
   }
 
-  private KvCheckError checkContainerFile() {
+  private void checkContainerFile() throws IOException {
     /**
      * compare the values in the container file loaded from disk,
      * with the values we are expecting
      */
-    KvCheckError error = KvCheckError.ERROR_NONE;
     String dbType;
     Preconditions
         .checkState(onDiskContainerData != null, "Container File not loaded");
-    KvCheckAction next;
 
-    try {
-      ContainerUtils.verifyChecksum(onDiskContainerData);
-    } catch (Exception e) {
-      error = KvCheckError.CONTAINERDATA_CKSUM;
-      handleCorruption("Container File Checksum mismatch", error, e);
-      return error;
-    }
+    ContainerUtils.verifyChecksum(onDiskContainerData);
 
     if (onDiskContainerData.getContainerType()
         != ContainerProtos.ContainerType.KeyValueContainer) {
       String errStr = "Bad Container type in Containerdata for " + containerID;
-      error = KvCheckError.CONTAINERDATA_TYPE;
-      handleCorruption(errStr, error, null);
-      return error; // Abort if we do not know the type of Container
+      throw new IOException(errStr);
     }
 
     if (onDiskContainerData.getContainerID() != containerID) {
       String errStr =
           "Bad ContainerID field in Containerdata for " + containerID;
-      error = KvCheckError.CONTAINERDATA_ID;
-      next = handleCorruption(errStr, error, null);
-      if (next == KvCheckAction.ABORT) {
-        return error;
-      } // else continue checking other data elements
+      throw new IOException(errStr);
     }
 
     dbType = onDiskContainerData.getContainerDBType();
@@ -237,9 +195,7 @@ public class KeyValueContainerCheck {
         !dbType.equals(OZONE_METADATA_STORE_IMPL_LEVELDB)) {
       String errStr = "Unknown DBType [" + dbType
           + "] in Container File for  [" + containerID + "]";
-      error = KvCheckError.CONTAINERDATA_DBTYPE;
-      handleCorruption(errStr, error, null);
-      return error;
+      throw new IOException(errStr);
     }
 
     KeyValueContainerData kvData = onDiskContainerData;
@@ -248,17 +204,11 @@ public class KeyValueContainerCheck {
           "Bad metadata path in Containerdata for " + containerID + "Expected ["
               + metadataPath.toString() + "] Got [" + kvData.getMetadataPath()
               + "]";
-      error = KvCheckError.CONTAINERDATA_METADATA_PATH;
-      next = handleCorruption(errStr, error, null);
-      if (next == KvCheckAction.ABORT) {
-        return error;
-      }
+      throw new IOException(errStr);
     }
-
-    return error;
   }
 
-  private KvCheckError checkBlockDB() {
+  private void checkBlockDB() throws IOException {
     /**
      * Check the integrity of the DB inside each container.
      * In Scope:
@@ -269,52 +219,31 @@ public class KeyValueContainerCheck {
      * 1. chunk checksum verification. this is left to a separate
      * slow chunk scanner
      */
-    KvCheckError error;
     Preconditions.checkState(onDiskContainerData != null,
         "invoke loadContainerData prior to calling this function");
     File dbFile;
     File metaDir = new File(metadataPath);
 
-    try {
-      dbFile = KeyValueContainerLocationUtil
-          .getContainerDBFile(metaDir, containerID);
-
-      if (!dbFile.exists() || !dbFile.canRead()) {
+    dbFile = KeyValueContainerLocationUtil
+        .getContainerDBFile(metaDir, containerID);
 
-        String dbFileErrorMsg = "Unable to access DB File [" + dbFile.toString()
-            + "] for Container [" + containerID + "] metadata path ["
-            + metadataPath + "]";
-        error = KvCheckError.DB_ACCESS;
-        handleCorruption(dbFileErrorMsg, error, null);
-        return error;
-      }
-    } catch (Exception e) {
-      String dbFileErrorMessage =
-          "Exception when initializing DBFile" + "with metadatapath ["
-              + metadataPath + "] for Container [" + containerID
-              + "]";
-      error = KvCheckError.DB_ACCESS;
-      handleCorruption(dbFileErrorMessage, error, e);
-      return error;
+    if (!dbFile.exists() || !dbFile.canRead()) {
+      String dbFileErrorMsg = "Unable to access DB File [" + dbFile.toString()
+          + "] for Container [" + containerID + "] metadata path ["
+          + metadataPath + "]";
+      throw new IOException(dbFileErrorMsg);
     }
-    onDiskContainerData.setDbFile(dbFile);
 
-    try {
-      MetadataStore db = BlockUtils
-          .getDB(onDiskContainerData, checkConfig);
-      error = iterateBlockDB(db);
-    } catch (Exception e) {
-      error = KvCheckError.DB_ITERATOR;
-      handleCorruption("Block DB Iterator aborted", error, e);
-      return error;
-    }
 
-    return error;
+    onDiskContainerData.setDbFile(dbFile);
+    MetadataStore db = BlockUtils
+        .getDB(onDiskContainerData, checkConfig);
+
+    iterateBlockDB(db);
   }
 
-  private KvCheckError iterateBlockDB(MetadataStore db)
+  private void iterateBlockDB(MetadataStore db)
       throws IOException {
-    KvCheckError error = KvCheckError.ERROR_NONE;
     Preconditions.checkState(db != null);
 
     // get "normal" keys from the Block DB
@@ -328,103 +257,39 @@ public class KeyValueContainerCheck {
       List<ContainerProtos.ChunkInfo> chunkInfoList = block.getChunks();
       for (ContainerProtos.ChunkInfo chunk : chunkInfoList) {
         File chunkFile;
-        try {
-          chunkFile = ChunkUtils
-              .getChunkFile(onDiskContainerData,
-                  ChunkInfo.getFromProtoBuf(chunk));
-        } catch (Exception e) {
-          error = KvCheckError.MISSING_CHUNK_FILE;
-          handleCorruption("Unable to access chunk path", error, e);
-          return error;
-        }
+        chunkFile = ChunkUtils.getChunkFile(onDiskContainerData,
+            ChunkInfo.getFromProtoBuf(chunk));
 
         if (!chunkFile.exists()) {
-          error = KvCheckError.MISSING_CHUNK_FILE;
-
           // concurrent mutation in Block DB? lookup the block again.
           byte[] bdata = db.get(
               Longs.toByteArray(block.getBlockID().getLocalID()));
           if (bdata == null) {
             LOG.trace("concurrency with delete, ignoring deleted block");
-            error = KvCheckError.ERROR_NONE;
             break; // skip to next block from kvIter
           } else {
-            handleCorruption("Missing chunk file", error, null);
-            return error;
+            String errorStr = "Missing chunk file "
+                + chunkFile.getAbsolutePath();
+            throw new IOException(errorStr);
           }
         }
       }
     }
-
-    return error;
   }
 
-  private KvCheckError loadContainerData() {
-    KvCheckError error = KvCheckError.ERROR_NONE;
+  private void loadContainerData() throws IOException {
 
     File containerFile = KeyValueContainer
         .getContainerFile(metadataPath.toString(), containerID);
 
-    try {
-      onDiskContainerData = (KeyValueContainerData) ContainerDataYaml
-          .readContainerFile(containerFile);
-    } catch (IOException e) {
-      error = KvCheckError.FILE_LOAD;
-      handleCorruption("Unable to load Container File", error, e);
-    }
-
-    return error;
+    onDiskContainerData = (KeyValueContainerData) ContainerDataYaml
+        .readContainerFile(containerFile);
   }
 
-  private KvCheckAction handleCorruption(String reason,
-      KvCheckError error, Exception e) {
-
-    // XXX HDDS-1201 need to implement corruption handling/reporting
-
+  private void handleCorruption(IOException e) {
     String errStr =
-        "Corruption detected in container: [" + containerID + "] reason: ["
-            + reason + "] error code: [" + error + "]";
-    String logMessage = null;
-
-    StackTraceElement[] stackeElems = Thread.currentThread().getStackTrace();
-    String caller =
-        "Corruption reported from Source File: [" + stackeElems[2].getFileName()
-            + "] Line: [" + stackeElems[2].getLineNumber() + "]";
-
-    if (e != null) {
-      logMessage = errStr + " exception: [" + e.getMessage() + "]";
-      e.printStackTrace();
-    } else {
-      logMessage = errStr;
-    }
-
-    LOG.error(caller);
+        "Corruption detected in container: [" + containerID + "] ";
+    String logMessage = errStr + "Exception: [" + e.getMessage() + "]";
     LOG.error(logMessage);
-
-    return KvCheckAction.ABORT;
-  }
-
-  /**
-   * Pre-defined error codes for Container Metadata check.
-   */
-  public enum KvCheckError {
-    ERROR_NONE,
-    FILE_LOAD, // unable to load container metafile
-    METADATA_PATH_ACCESS, // metadata path is not accessible
-    CHUNKS_PATH_ACCESS, // chunks path is not accessible
-    CONTAINERDATA_ID, // bad Container-ID stored in Container file
-    CONTAINERDATA_METADATA_PATH, // bad metadata path in Container file
-    CONTAINERDATA_CHUNKS_PATH, // bad chunks path in Container file
-    CONTAINERDATA_CKSUM, // container file checksum mismatch
-    CONTAINERDATA_TYPE, // container file incorrect type of Container
-    CONTAINERDATA_DBTYPE, // unknown DB Type specified in Container File
-    DB_ACCESS, // unable to load Metastore DB
-    DB_ITERATOR, // unable to create block iterator for Metastore DB
-    MISSING_CHUNK_FILE // chunk file not found
-  }
-
-  private enum KvCheckAction {
-    CONTINUE, // Continue with remaining checks on the corrupt Container
-    ABORT     // Abort checks for the container
   }
 }
\ No newline at end of file
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java
index 3395214..0bc1bbc 100644
--- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java
@@ -55,6 +55,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_IMPL;
 
 import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_LEVELDB;
 import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_ROCKSDB;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 /**
@@ -100,7 +101,7 @@ import static org.junit.Assert.assertTrue;
     int deletedBlocks = 1;
     int normalBlocks = 3;
     int chunksPerBlock = 4;
-    KeyValueContainerCheck.KvCheckError error;
+    boolean corruption = false;
 
     // test Closed Container
     createContainerWithBlocks(containerID, normalBlocks, deletedBlocks, 65536,
@@ -114,14 +115,14 @@ import static org.junit.Assert.assertTrue;
             containerID);
 
     // first run checks on a Open Container
-    error = kvCheck.fastCheck();
-    assertTrue(error == KeyValueContainerCheck.KvCheckError.ERROR_NONE);
+    corruption = kvCheck.fastCheck();
+    assertFalse(corruption);
 
     container.close();
 
     // next run checks on a Closed Container
-    error = kvCheck.fullCheck();
-    assertTrue(error == KeyValueContainerCheck.KvCheckError.ERROR_NONE);
+    corruption = kvCheck.fullCheck();
+    assertFalse(corruption);
   }
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org