You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ar...@apache.org on 2016/09/11 01:38:08 UTC

[3/3] hadoop git commit: HDFS-10830. FsDatasetImpl#removeVolumes crashes with IllegalMonitorStateException when vol being removed is in use. (Arpit Agarwal and Manoj Govindassamy)

HDFS-10830. FsDatasetImpl#removeVolumes crashes with IllegalMonitorStateException when vol being removed is in use. (Arpit Agarwal and Manoj Govindassamy)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/ed35f990
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/ed35f990
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/ed35f990

Branch: refs/heads/branch-2.8
Commit: ed35f99045b088996111b1d67e52bb5c0b14f804
Parents: 04f620c
Author: Arpit Agarwal <ar...@apache.org>
Authored: Sat Sep 10 18:34:28 2016 -0700
Committer: Arpit Agarwal <ar...@apache.org>
Committed: Sat Sep 10 18:34:28 2016 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/util/AutoCloseableLock.java |  8 ++++++++
 .../datanode/fsdataset/impl/FsDatasetImpl.java    |  7 ++++++-
 .../datanode/fsdataset/impl/FsVolumeList.java     | 18 ++++++++++++------
 3 files changed, 26 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/ed35f990/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AutoCloseableLock.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AutoCloseableLock.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AutoCloseableLock.java
index d920bc6..d7fe93d 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AutoCloseableLock.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AutoCloseableLock.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.util;
 
 import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
 
 import com.google.common.annotations.VisibleForTesting;
@@ -135,4 +136,11 @@ public class AutoCloseableLock implements AutoCloseable {
     throw new UnsupportedOperationException();
   }
 
+  /**
+   * See {@link ReentrantLock#newCondition()}.
+   * @return the Condition object
+   */
+  public Condition newCondition() {
+    return lock.newCondition();
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/ed35f990/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
index 1f0d975..a6730cc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
@@ -42,6 +42,7 @@ import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.*;
 import java.util.concurrent.Executor;
+import java.util.concurrent.locks.Condition;
 import java.util.concurrent.TimeUnit;
 
 import javax.management.NotCompliantMBeanException;
@@ -271,6 +272,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
   private final int maxDataLength;
 
   private final AutoCloseableLock datasetLock;
+  private final Condition datasetLockCondition;
+  
   /**
    * An FSDataset has a directory where it loads its data files.
    */
@@ -288,6 +291,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
             DFSConfigKeys.DFS_LOCK_SUPPRESS_WARNING_INTERVAL_DEFAULT,
             TimeUnit.MILLISECONDS),
           300));
+    this.datasetLockCondition = datasetLock.newCondition();
+
     // The number of volumes required for operation is the total number
     // of volumes minus the number of failed volumes we can tolerate.
     volFailuresTolerated = datanode.getDnConf().getVolFailuresTolerated();
@@ -519,7 +524,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
           // Disable the volume from the service.
           asyncDiskService.removeVolume(sd.getCurrentDir());
           volumes.removeVolume(absRoot, clearFailure);
-          volumes.waitVolumeRemoved(5000, this);
+          volumes.waitVolumeRemoved(5000, datasetLockCondition);
 
           // Removed all replica information for the blocks on the volume.
           // Unlike updating the volumeMap in addVolume(), this operation does

http://git-wip-us.apache.org/repos/asf/hadoop/blob/ed35f990/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
index ea4d597..634ad42 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
@@ -31,6 +31,8 @@ import java.util.Map;
 import java.util.TreeMap;
 import java.util.Set;
 import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.Condition;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.StorageType;
@@ -41,6 +43,7 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.VolumeChoosingPolicy;
 import org.apache.hadoop.hdfs.server.datanode.BlockScanner;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.util.AutoCloseableLock;
 import org.apache.hadoop.util.DiskChecker.DiskErrorException;
 import org.apache.hadoop.util.Time;
 
@@ -52,7 +55,8 @@ class FsVolumeList {
       Collections.synchronizedMap(new TreeMap<String, VolumeFailureInfo>());
   private final ConcurrentLinkedQueue<FsVolumeImpl> volumesBeingRemoved =
       new ConcurrentLinkedQueue<>();
-  private Object checkDirsMutex = new Object();
+  private final AutoCloseableLock checkDirsLock;
+  private final Condition checkDirsLockCondition;
 
   private final VolumeChoosingPolicy<FsVolumeImpl> blockChooser;
   private final BlockScanner blockScanner;
@@ -62,6 +66,8 @@ class FsVolumeList {
       VolumeChoosingPolicy<FsVolumeImpl> blockChooser) {
     this.blockChooser = blockChooser;
     this.blockScanner = blockScanner;
+    this.checkDirsLock = new AutoCloseableLock();
+    this.checkDirsLockCondition = checkDirsLock.newCondition();
     for (VolumeFailureInfo volumeFailureInfo: initialVolumeFailureInfos) {
       volumeFailureInfos.put(volumeFailureInfo.getFailedStorageLocation(),
           volumeFailureInfo);
@@ -224,12 +230,12 @@ class FsVolumeList {
   /**
    * Calls {@link FsVolumeImpl#checkDirs()} on each volume.
    * 
-   * Use checkDirsMutext to allow only one instance of checkDirs() call
+   * Use {@link checkDirsLock} to allow only one instance of checkDirs() call.
    *
    * @return list of all the failed volumes.
    */
   Set<File> checkDirs() {
-    synchronized(checkDirsMutex) {
+    try (AutoCloseableLock lock = checkDirsLock.acquire()) {
       Set<File> failedVols = null;
       
       // Make a copy of volumes for performing modification 
@@ -260,7 +266,7 @@ class FsVolumeList {
             + " failure volumes.");
       }
 
-      waitVolumeRemoved(5000, checkDirsMutex);
+      waitVolumeRemoved(5000, checkDirsLockCondition);
       return failedVols;
     }
   }
@@ -271,13 +277,13 @@ class FsVolumeList {
    *
    * @param sleepMillis interval to recheck.
    */
-  void waitVolumeRemoved(int sleepMillis, Object monitor) {
+  void waitVolumeRemoved(int sleepMillis, Condition condition) {
     while (!checkVolumesRemoved()) {
       if (FsDatasetImpl.LOG.isDebugEnabled()) {
         FsDatasetImpl.LOG.debug("Waiting for volume reference to be released.");
       }
       try {
-        monitor.wait(sleepMillis);
+        condition.await(sleepMillis, TimeUnit.MILLISECONDS);
       } catch (InterruptedException e) {
         FsDatasetImpl.LOG.info("Thread interrupted when waiting for "
             + "volume reference to be released.");


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org