You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cm...@apache.org on 2014/12/09 19:56:53 UTC
hadoop git commit: Incorrect locking in FsVolumeList#checkDirs can
hang datanodes (Noah Lorang via Colin P. McCabe)
Repository: hadoop
Updated Branches:
refs/heads/trunk be86237c0 -> d8352b9b2
Incorrect locking in FsVolumeList#checkDirs can hang datanodes (Noah Lorang via Colin P. McCabe)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d8352b9b
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d8352b9b
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d8352b9b
Branch: refs/heads/trunk
Commit: d8352b9b2b99aa46679c5880a724ba3f0ceb41ff
Parents: be86237
Author: Colin Patrick Mccabe <cm...@cloudera.com>
Authored: Tue Dec 9 10:55:17 2014 -0800
Committer: Colin Patrick Mccabe <cm...@cloudera.com>
Committed: Tue Dec 9 10:56:46 2014 -0800
----------------------------------------------------------------------
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++
.../datanode/fsdataset/impl/FsVolumeList.java | 56 ++++++++++----------
2 files changed, 31 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d8352b9b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 55026a2..626d90a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -574,6 +574,9 @@ Release 2.6.1 - UNRELEASED
HDFS-4882. Prevent the Namenode's LeaseManager from looping forever in
checkLeases (Ravi Prakash via Colin P. McCabe)
+ HDFS-7489. Incorrect locking in FsVolumeList#checkDirs can hang datanodes
+ (Noah Lorang via Colin P. McCabe)
+
Release 2.6.0 - 2014-11-18
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d8352b9b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
index 837ddf7..55329ae 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
@@ -36,6 +36,7 @@ class FsVolumeList {
* This list is replaced on modification holding "this" lock.
*/
volatile List<FsVolumeImpl> volumes = null;
+ private Object checkDirsMutex = new Object();
private final VolumeChoosingPolicy<FsVolumeImpl> blockChooser;
private volatile int numFailedVolumes;
@@ -167,40 +168,39 @@ class FsVolumeList {
* Calls {@link FsVolumeImpl#checkDirs()} on each volume, removing any
* volumes from the active list that result in a DiskErrorException.
*
- * This method is synchronized to allow only one instance of checkDirs()
- * call
+ * Use checkDirsMutext to allow only one instance of checkDirs() call
+ *
* @return list of all the removed volumes.
*/
- synchronized List<FsVolumeImpl> checkDirs() {
- ArrayList<FsVolumeImpl> removedVols = null;
-
- // Make a copy of volumes for performing modification
- final List<FsVolumeImpl> volumeList = new ArrayList<FsVolumeImpl>(volumes);
+ List<FsVolumeImpl> checkDirs() {
+ synchronized(checkDirsMutex) {
+ ArrayList<FsVolumeImpl> removedVols = null;
+
+ // Make a copy of volumes for performing modification
+ final List<FsVolumeImpl> volumeList = new ArrayList<FsVolumeImpl>(volumes);
- for(Iterator<FsVolumeImpl> i = volumeList.iterator(); i.hasNext(); ) {
- final FsVolumeImpl fsv = i.next();
- try {
- fsv.checkDirs();
- } catch (DiskErrorException e) {
- FsDatasetImpl.LOG.warn("Removing failed volume " + fsv + ": ",e);
- if (removedVols == null) {
- removedVols = new ArrayList<FsVolumeImpl>(1);
+ for(Iterator<FsVolumeImpl> i = volumeList.iterator(); i.hasNext(); ) {
+ final FsVolumeImpl fsv = i.next();
+ try {
+ fsv.checkDirs();
+ } catch (DiskErrorException e) {
+ FsDatasetImpl.LOG.warn("Removing failed volume " + fsv + ": ",e);
+ if (removedVols == null) {
+ removedVols = new ArrayList<FsVolumeImpl>(1);
+ }
+ removedVols.add(fsv);
+ removeVolume(fsv.getBasePath());
+ numFailedVolumes++;
}
- removedVols.add(fsv);
- fsv.shutdown();
- i.remove(); // Remove the volume
- numFailedVolumes++;
}
- }
-
- if (removedVols != null && removedVols.size() > 0) {
- // Replace volume list
- volumes = Collections.unmodifiableList(volumeList);
- FsDatasetImpl.LOG.warn("Completed checkDirs. Removed " + removedVols.size()
- + " volumes. Current volumes: " + this);
- }
+
+ if (removedVols != null && removedVols.size() > 0) {
+ FsDatasetImpl.LOG.warn("Completed checkDirs. Removed " + removedVols.size()
+ + " volumes. Current volumes: " + this);
+ }
- return removedVols;
+ return removedVols;
+ }
}
@Override