You are viewing a plain text version of this content. The canonical link for it is here.

Posted to common-commits@hadoop.apache.org by ha...@apache.org on 2018/02/16 22:00:23 UTC

[01/21] hadoop git commit: HADOOP-15204. Add Configuration API for parsing storage sizes. Contributed by Anu Engineer.

Repository: hadoop
Updated Branches:
  refs/heads/HDFS-12996 bddfe42e2 -> 0898ff42e


HADOOP-15204. Add Configuration API for parsing storage sizes. Contributed by Anu Engineer.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8f66affd
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8f66affd
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8f66affd

Branch: refs/heads/HDFS-12996
Commit: 8f66affd6265c9e4231e18d7ca352fb3035dae9a
Parents: bddfe42
Author: Anu Engineer <ae...@apache.org>
Authored: Wed Feb 14 13:11:37 2018 -0800
Committer: Anu Engineer <ae...@apache.org>
Committed: Wed Feb 14 13:11:37 2018 -0800

----------------------------------------------------------------------
 .../org/apache/hadoop/conf/Configuration.java   |  80 +++
 .../org/apache/hadoop/conf/StorageSize.java     | 106 ++++
 .../org/apache/hadoop/conf/StorageUnit.java     | 530 +++++++++++++++++++
 .../apache/hadoop/conf/TestConfiguration.java   |  76 +++
 .../org/apache/hadoop/conf/TestStorageUnit.java | 277 ++++++++++
 5 files changed, 1069 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/8f66affd/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
index fce2194..f8e4638 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
@@ -109,6 +109,9 @@ import org.w3c.dom.Element;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
 
+import static org.apache.commons.lang3.StringUtils.isBlank;
+import static org.apache.commons.lang3.StringUtils.isNotBlank;
+
 /**
  * Provides access to configuration parameters.
  *
@@ -1818,6 +1821,83 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
     }
     return durations;
   }
+  /**
+   * Gets the Storage Size from the config, or returns the defaultValue. The
+   * unit of return value is specified in target unit.
+   *
+   * @param name - Key Name
+   * @param defaultValue - Default Value -- e.g. 100MB
+   * @param targetUnit - The units that we want result to be in.
+   * @return double -- formatted in target Units
+   */
+  public double getStorageSize(String name, String defaultValue,
+      StorageUnit targetUnit) {
+    Preconditions.checkState(isNotBlank(name), "Key cannot be blank.");
+    String vString = get(name);
+    if (isBlank(vString)) {
+      vString = defaultValue;
+    }
+
+    // Please note: There is a bit of subtlety here. If the user specifies
+    // the default unit as "1GB", but the requested unit is MB, we will return
+    // the format in MB even thought the default string is specified in GB.
+
+    // Converts a string like "1GB" to to unit specified in targetUnit.
+
+    StorageSize measure = StorageSize.parse(vString);
+    return convertStorageUnit(measure.getValue(), measure.getUnit(),
+        targetUnit);
+  }
+
+  /**
+   * Gets storage size from a config file.
+   *
+   * @param name - Key to read.
+   * @param defaultValue - The default value to return in case the key is
+   * not present.
+   * @param targetUnit - The Storage unit that should be used
+   * for the return value.
+   * @return - double value in the Storage Unit specified.
+   */
+  public double getStorageSize(String name, double defaultValue,
+      StorageUnit targetUnit) {
+    Preconditions.checkNotNull(targetUnit, "Conversion unit cannot be null.");
+    Preconditions.checkState(isNotBlank(name), "Name cannot be blank.");
+    String vString = get(name);
+    if (isBlank(vString)) {
+      return targetUnit.getDefault(defaultValue);
+    }
+
+    StorageSize measure = StorageSize.parse(vString);
+    return convertStorageUnit(measure.getValue(), measure.getUnit(),
+        targetUnit);
+
+  }
+
+  /**
+   * Sets Storage Size for the specified key.
+   *
+   * @param name - Key to set.
+   * @param value - The numeric value to set.
+   * @param unit - Storage Unit to be used.
+   */
+  public void setStorageSize(String name, double value, StorageUnit unit) {
+    set(name, value + unit.getShortName());
+  }
+
+  /**
+   * convert the value from one storage unit to another.
+   *
+   * @param value - value
+   * @param sourceUnit - Source unit to convert from
+   * @param targetUnit - target unit.
+   * @return double.
+   */
+  private double convertStorageUnit(double value, StorageUnit sourceUnit,
+      StorageUnit targetUnit) {
+    double byteValue = sourceUnit.toBytes(value);
+    return targetUnit.fromBytes(byteValue);
+  }
 
   /**
    * Get the value of the <code>name</code> property as a <code>Pattern</code>.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8f66affd/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/StorageSize.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/StorageSize.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/StorageSize.java
new file mode 100644
index 0000000..6cad6f7
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/StorageSize.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ */
+
+package org.apache.hadoop.conf;
+
+import java.util.Locale;
+
+import static org.apache.commons.lang3.StringUtils.isNotBlank;
+
+/**
+ * A class that contains the numeric value and the unit of measure.
+ */
+public class StorageSize {
+  private final StorageUnit unit;
+  private final double value;
+
+  /**
+   * Constucts a Storage Measure, which contains the value and the unit of
+   * measure.
+   *
+   * @param unit - Unit of Measure
+   * @param value - Numeric value.
+   */
+  public StorageSize(StorageUnit unit, double value) {
+    this.unit = unit;
+    this.value = value;
+  }
+
+  private static void checkState(boolean state, String errorString){
+    if(!state) {
+      throw new IllegalStateException(errorString);
+    }
+  }
+
+  public static StorageSize parse(String value) {
+    checkState(isNotBlank(value), "value cannot be blank");
+    String sanitizedValue = value.trim().toLowerCase(Locale.ENGLISH);
+    StorageUnit parsedUnit = null;
+    for (StorageUnit unit : StorageUnit.values()) {
+      if (sanitizedValue.endsWith(unit.getShortName()) ||
+          sanitizedValue.endsWith(unit.getLongName()) ||
+          sanitizedValue.endsWith(unit.getSuffixChar())) {
+        parsedUnit = unit;
+        break;
+      }
+    }
+
+    if (parsedUnit == null) {
+      throw new IllegalArgumentException(value + " is not in expected format." +
+          "Expected format is <number><unit>. e.g. 1000MB");
+    }
+
+
+    String suffix = "";
+    boolean found = false;
+
+    // We are trying to get the longest match first, so the order of
+    // matching is getLongName, getShortName and then getSuffixChar.
+    if (!found && sanitizedValue.endsWith(parsedUnit.getLongName())) {
+      found = true;
+      suffix = parsedUnit.getLongName();
+    }
+
+    if (!found && sanitizedValue.endsWith(parsedUnit.getShortName())) {
+      found = true;
+      suffix = parsedUnit.getShortName();
+    }
+
+    if (!found && sanitizedValue.endsWith(parsedUnit.getSuffixChar())) {
+      found = true;
+      suffix = parsedUnit.getSuffixChar();
+    }
+
+    checkState(found, "Something is wrong, we have to find a " +
+        "match. Internal error.");
+
+    String valString =
+        sanitizedValue.substring(0, value.length() - suffix.length());
+    return new StorageSize(parsedUnit, Double.parseDouble(valString));
+
+  }
+
+  public StorageUnit getUnit() {
+    return unit;
+  }
+
+  public double getValue() {
+    return value;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8f66affd/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/StorageUnit.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/StorageUnit.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/StorageUnit.java
new file mode 100644
index 0000000..fe3c6f8
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/StorageUnit.java
@@ -0,0 +1,530 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ */
+
+package org.apache.hadoop.conf;
+
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+
+/**
+ * Class that maintains different forms of Storage Units.
+ */
+public enum StorageUnit {
+  /*
+    We rely on BYTES being the last to get longest matching short names first.
+    The short name of bytes is b and it will match with other longer names.
+
+    if we change this order, the corresponding code in
+    Configuration#parseStorageUnit needs to be changed too, since values()
+    call returns the Enums in declared order and we depend on it.
+   */
+
+  EB {
+    @Override
+    public double toBytes(double value) {
+      return multiply(value, EXABYTES);
+    }
+
+    @Override
+    public double toKBs(double value) {
+      return multiply(value, EXABYTES / KILOBYTES);
+    }
+
+    @Override
+    public double toMBs(double value) {
+      return multiply(value, EXABYTES / MEGABYTES);
+    }
+
+    @Override
+    public double toGBs(double value) {
+      return multiply(value, EXABYTES / GIGABYTES);
+    }
+
+    @Override
+    public double toTBs(double value) {
+      return multiply(value, EXABYTES / TERABYTES);
+    }
+
+    @Override
+    public double toPBs(double value) {
+      return multiply(value, EXABYTES / PETABYTES);
+    }
+
+    @Override
+    public double toEBs(double value) {
+      return value;
+    }
+
+    @Override
+    public String getLongName() {
+      return "exabytes";
+    }
+
+    @Override
+    public String getShortName() {
+      return "eb";
+    }
+
+    @Override
+    public String getSuffixChar() {
+      return "e";
+    }
+
+    @Override
+    public double getDefault(double value) {
+      return toEBs(value);
+    }
+
+    @Override
+    public double fromBytes(double value) {
+      return divide(value, EXABYTES);
+    }
+  },
+  PB {
+    @Override
+    public double toBytes(double value) {
+      return multiply(value, PETABYTES);
+    }
+
+    @Override
+    public double toKBs(double value) {
+      return multiply(value, PETABYTES / KILOBYTES);
+    }
+
+    @Override
+    public double toMBs(double value) {
+      return multiply(value, PETABYTES / MEGABYTES);
+    }
+
+    @Override
+    public double toGBs(double value) {
+      return multiply(value, PETABYTES / GIGABYTES);
+    }
+
+    @Override
+    public double toTBs(double value) {
+      return multiply(value, PETABYTES / TERABYTES);
+    }
+
+    @Override
+    public double toPBs(double value) {
+      return value;
+    }
+
+    @Override
+    public double toEBs(double value) {
+      return divide(value, EXABYTES / PETABYTES);
+    }
+
+    @Override
+    public String getLongName() {
+      return "petabytes";
+    }
+
+    @Override
+    public String getShortName() {
+      return "pb";
+    }
+
+    @Override
+    public String getSuffixChar() {
+      return "p";
+    }
+
+    @Override
+    public double getDefault(double value) {
+      return toPBs(value);
+    }
+
+    @Override
+    public double fromBytes(double value) {
+      return divide(value, PETABYTES);
+    }
+  },
+  TB {
+    @Override
+    public double toBytes(double value) {
+      return multiply(value, TERABYTES);
+    }
+
+    @Override
+    public double toKBs(double value) {
+      return multiply(value, TERABYTES / KILOBYTES);
+    }
+
+    @Override
+    public double toMBs(double value) {
+      return multiply(value, TERABYTES / MEGABYTES);
+    }
+
+    @Override
+    public double toGBs(double value) {
+      return multiply(value, TERABYTES / GIGABYTES);
+    }
+
+    @Override
+    public double toTBs(double value) {
+      return value;
+    }
+
+    @Override
+    public double toPBs(double value) {
+      return divide(value, PETABYTES / TERABYTES);
+    }
+
+    @Override
+    public double toEBs(double value) {
+      return divide(value, EXABYTES / TERABYTES);
+    }
+
+    @Override
+    public String getLongName() {
+      return "terabytes";
+    }
+
+    @Override
+    public String getShortName() {
+      return "tb";
+    }
+
+    @Override
+    public String getSuffixChar() {
+      return "t";
+    }
+
+    @Override
+    public double getDefault(double value) {
+      return toTBs(value);
+    }
+
+    @Override
+    public double fromBytes(double value) {
+      return divide(value, TERABYTES);
+    }
+  },
+  GB {
+    @Override
+    public double toBytes(double value) {
+      return multiply(value, GIGABYTES);
+    }
+
+    @Override
+    public double toKBs(double value) {
+      return multiply(value, GIGABYTES / KILOBYTES);
+    }
+
+    @Override
+    public double toMBs(double value) {
+      return multiply(value, GIGABYTES / MEGABYTES);
+    }
+
+    @Override
+    public double toGBs(double value) {
+      return value;
+    }
+
+    @Override
+    public double toTBs(double value) {
+      return divide(value, TERABYTES / GIGABYTES);
+    }
+
+    @Override
+    public double toPBs(double value) {
+      return divide(value, PETABYTES / GIGABYTES);
+    }
+
+    @Override
+    public double toEBs(double value) {
+      return divide(value, EXABYTES / GIGABYTES);
+    }
+
+    @Override
+    public String getLongName() {
+      return "gigabytes";
+    }
+
+    @Override
+    public String getShortName() {
+      return "gb";
+    }
+
+    @Override
+    public String getSuffixChar() {
+      return "g";
+    }
+
+    @Override
+    public double getDefault(double value) {
+      return toGBs(value);
+    }
+
+    @Override
+    public double fromBytes(double value) {
+      return divide(value, GIGABYTES);
+    }
+  },
+  MB {
+    @Override
+    public double toBytes(double value) {
+      return multiply(value, MEGABYTES);
+    }
+
+    @Override
+    public double toKBs(double value) {
+      return multiply(value, MEGABYTES / KILOBYTES);
+    }
+
+    @Override
+    public double toMBs(double value) {
+      return value;
+    }
+
+    @Override
+    public double toGBs(double value) {
+      return divide(value, GIGABYTES / MEGABYTES);
+    }
+
+    @Override
+    public double toTBs(double value) {
+      return divide(value, TERABYTES / MEGABYTES);
+    }
+
+    @Override
+    public double toPBs(double value) {
+      return divide(value, PETABYTES / MEGABYTES);
+    }
+
+    @Override
+    public double toEBs(double value) {
+      return divide(value, EXABYTES / MEGABYTES);
+    }
+
+    @Override
+    public String getLongName() {
+      return "megabytes";
+    }
+
+    @Override
+    public String getShortName() {
+      return "mb";
+    }
+
+    @Override
+    public String getSuffixChar() {
+      return "m";
+    }
+
+    @Override
+    public double fromBytes(double value) {
+      return divide(value, MEGABYTES);
+    }
+
+    @Override
+    public double getDefault(double value) {
+      return toMBs(value);
+    }
+  },
+  KB {
+    @Override
+    public double toBytes(double value) {
+      return multiply(value, KILOBYTES);
+    }
+
+    @Override
+    public double toKBs(double value) {
+      return value;
+    }
+
+    @Override
+    public double toMBs(double value) {
+      return divide(value, MEGABYTES / KILOBYTES);
+    }
+
+    @Override
+    public double toGBs(double value) {
+      return divide(value, GIGABYTES / KILOBYTES);
+    }
+
+    @Override
+    public double toTBs(double value) {
+      return divide(value, TERABYTES / KILOBYTES);
+    }
+
+    @Override
+    public double toPBs(double value) {
+      return divide(value, PETABYTES / KILOBYTES);
+    }
+
+    @Override
+    public double toEBs(double value) {
+      return divide(value, EXABYTES / KILOBYTES);
+    }
+
+    @Override
+    public String getLongName() {
+      return "kilobytes";
+    }
+
+    @Override
+    public String getShortName() {
+      return "kb";
+    }
+
+    @Override
+    public String getSuffixChar() {
+      return "k";
+    }
+
+    @Override
+    public double getDefault(double value) {
+      return toKBs(value);
+    }
+
+    @Override
+    public double fromBytes(double value) {
+      return divide(value, KILOBYTES);
+    }
+  },
+  BYTES {
+    @Override
+    public double toBytes(double value) {
+      return value;
+    }
+
+    @Override
+    public double toKBs(double value) {
+      return divide(value, KILOBYTES);
+    }
+
+    @Override
+    public double toMBs(double value) {
+      return divide(value, MEGABYTES);
+    }
+
+    @Override
+    public double toGBs(double value) {
+      return divide(value, GIGABYTES);
+    }
+
+    @Override
+    public double toTBs(double value) {
+      return divide(value, TERABYTES);
+    }
+
+    @Override
+    public double toPBs(double value) {
+      return divide(value, PETABYTES);
+    }
+
+    @Override
+    public double toEBs(double value) {
+      return divide(value, EXABYTES);
+    }
+
+    @Override
+    public String getLongName() {
+      return "bytes";
+    }
+
+    @Override
+    public String getShortName() {
+      return "b";
+    }
+
+    @Override
+    public String getSuffixChar() {
+      return "b";
+    }
+
+    @Override
+    public double getDefault(double value) {
+      return toBytes(value);
+    }
+
+    @Override
+    public double fromBytes(double value) {
+      return value;
+    }
+  };
+
+  private static final double BYTE = 1L;
+  private static final double KILOBYTES = BYTE * 1024L;
+  private static final double MEGABYTES = KILOBYTES * 1024L;
+  private static final double GIGABYTES = MEGABYTES * 1024L;
+  private static final double TERABYTES = GIGABYTES * 1024L;
+  private static final double PETABYTES = TERABYTES * 1024L;
+  private static final double EXABYTES = PETABYTES * 1024L;
+  private static final int PRECISION = 4;
+
+  /**
+   * Using BigDecimal to avoid issues with overflow and underflow.
+   *
+   * @param value - value
+   * @param divisor - divisor.
+   * @return -- returns a double that represents this value
+   */
+  private static double divide(double value, double divisor) {
+    BigDecimal val = new BigDecimal(value);
+    BigDecimal bDivisor = new BigDecimal(divisor);
+    return val.divide(bDivisor).setScale(PRECISION, RoundingMode.HALF_UP)
+        .doubleValue();
+  }
+
+  /**
+   * Using BigDecimal so we can throw if we are overflowing the Long.Max.
+   *
+   * @param first - First Num.
+   * @param second - Second Num.
+   * @return Returns a double
+   */
+  private static double multiply(double first, double second) {
+    BigDecimal firstVal = new BigDecimal(first);
+    BigDecimal secondVal = new BigDecimal(second);
+    return firstVal.multiply(secondVal)
+        .setScale(PRECISION, RoundingMode.HALF_UP).doubleValue();
+  }
+
+  public abstract double toBytes(double value);
+
+  public abstract double toKBs(double value);
+
+  public abstract double toMBs(double value);
+
+  public abstract double toGBs(double value);
+
+  public abstract double toTBs(double value);
+
+  public abstract double toPBs(double value);
+
+  public abstract double toEBs(double value);
+
+  public abstract String getLongName();
+
+  public abstract String getShortName();
+
+  public abstract String getSuffixChar();
+
+  public abstract double getDefault(double value);
+
+  public abstract double fromBytes(double value);
+
+  public String toString() {
+    return getLongName();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8f66affd/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
index 78dcdd6..24ec4fc 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
@@ -48,7 +48,15 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+
+import static org.apache.hadoop.conf.StorageUnit.BYTES;
+import static org.apache.hadoop.conf.StorageUnit.GB;
+import static org.apache.hadoop.conf.StorageUnit.KB;
+import static org.apache.hadoop.conf.StorageUnit.MB;
+import static org.apache.hadoop.conf.StorageUnit.TB;
+import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.*;
 import static org.junit.Assert.assertArrayEquals;
 
@@ -68,10 +76,13 @@ import org.apache.log4j.AppenderSkeleton;
 import org.apache.log4j.Logger;
 import org.apache.log4j.spi.LoggingEvent;
 import org.hamcrest.CoreMatchers;
+import org.junit.rules.ExpectedException;
 import org.mockito.Mockito;
 
 public class TestConfiguration {
 
+  @Rule
+  public ExpectedException thrown= ExpectedException.none();
   private static final double DOUBLE_DELTA = 0.000000001f;
   private Configuration conf;
   final static String CONFIG = new File("./test-config-TestConfiguration.xml").getAbsolutePath();
@@ -1326,6 +1337,71 @@ public class TestConfiguration {
   }
 
   @Test
+  public void testStorageUnit() {
+    final String key = "valid.key";
+    final String nonKey = "not.a.key";
+    Configuration conf = new Configuration(false);
+
+    conf.setStorageSize(key, 10, MB);
+    // This call returns the value specified in the Key as a double in MBs.
+    assertThat(conf.getStorageSize(key, "1GB", MB),
+        is(10.0));
+
+    // Since this key is missing, This call converts the default value of  1GB
+    // to MBs are returns that value.
+    assertThat(conf.getStorageSize(nonKey, "1GB", MB),
+        is(1024.0));
+
+
+    conf.setStorageSize(key, 1024, BYTES);
+    assertThat(conf.getStorageSize(key, 100, KB), is(1.0));
+
+    assertThat(conf.getStorageSize(nonKey, 100.0, KB), is(100.0));
+
+    // We try out different kind of String formats to see if they work and
+    // during read, we also try to read using a different Storage Units.
+    conf.setStrings(key, "1TB");
+    assertThat(conf.getStorageSize(key, "1PB", GB), is(1024.0));
+
+    conf.setStrings(key, "1bytes");
+    assertThat(conf.getStorageSize(key, "1PB", KB), is(0.001));
+
+    conf.setStrings(key, "2048b");
+    assertThat(conf.getStorageSize(key, "1PB", KB), is(2.0));
+
+    conf.setStrings(key, "64 GB");
+    assertThat(conf.getStorageSize(key, "1PB", GB), is(64.0));
+
+    // Match the parsing patterns of getLongBytes, which takes single char
+    // suffix.
+    conf.setStrings(key, "1T");
+    assertThat(conf.getStorageSize(key, "1GB", TB), is(1.0));
+
+    conf.setStrings(key, "1k");
+    assertThat(conf.getStorageSize(key, "1GB", KB), is(1.0));
+
+    conf.setStrings(key, "10m");
+    assertThat(conf.getStorageSize(key, "1GB", MB), is(10.0));
+
+
+
+    // Missing format specification, this should throw.
+    conf.setStrings(key, "100");
+    thrown.expect(IllegalArgumentException.class);
+    conf.getStorageSize(key, "1PB", GB);
+
+    // illegal format specification, this should throw.
+    conf.setStrings(key, "1HB");
+    thrown.expect(IllegalArgumentException.class);
+    conf.getStorageSize(key, "1PB", GB);
+
+    // Illegal number  specification, this should throw.
+    conf.setStrings(key, "HadoopGB");
+    thrown.expect(IllegalArgumentException.class);
+    conf.getStorageSize(key, "1PB", GB);
+  }
+
+  @Test
   public void testTimeDurationWarning() {
     // check warn for possible loss of precision
     final String warnFormat = "Possible loss of precision converting %s" +

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8f66affd/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestStorageUnit.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestStorageUnit.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestStorageUnit.java
new file mode 100644
index 0000000..e29345d
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestStorageUnit.java
@@ -0,0 +1,277 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ */
+
+package org.apache.hadoop.conf;
+
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.MatcherAssert.assertThat;
+
+/**
+ * Tests that Storage Units work as expected.
+ */
+public class TestStorageUnit {
+  final static double KB = 1024.0;
+  final static double MB = KB * 1024.0;
+  final static double GB = MB * 1024.0;
+  final static double TB = GB * 1024.0;
+  final static double PB = TB * 1024.0;
+  final static double EB = PB * 1024.0;
+
+  @Test
+  public void testByteToKiloBytes() {
+    Map<Double, Double> results = new HashMap<>();
+    results.put(1024.0, 1.0);
+    results.put(2048.0, 2.0);
+    results.put(-1024.0, -1.0);
+    results.put(34565.0, 33.7549);
+    results.put(223344332.0, 218109.6992);
+    results.put(1234983.0, 1206.0381);
+    results.put(1234332.0, 1205.4023);
+    results.put(0.0, 0.0);
+
+    for (Map.Entry<Double, Double> entry : results.entrySet()) {
+      assertThat(StorageUnit.BYTES.toKBs(entry.getKey()), is(entry.getValue()));
+    }
+  }
+
+  @Test
+  public void testBytesToMegaBytes() {
+    Map<Double, Double> results = new HashMap<>();
+    results.put(1048576.0, 1.0);
+    results.put(24117248.0, 23.0);
+    results.put(459920023.0, 438.6139);
+    results.put(234443233.0, 223.5825);
+    results.put(-35651584.0, -34.0);
+    results.put(0.0, 0.0);
+    for (Map.Entry<Double, Double> entry : results.entrySet()) {
+      assertThat(StorageUnit.BYTES.toMBs(entry.getKey()), is(entry.getValue()));
+    }
+  }
+
+  @Test
+  public void testBytesToGigaBytes() {
+    Map<Double, Double> results = new HashMap<>();
+    results.put(1073741824.0, 1.0);
+    results.put(24696061952.0, 23.0);
+    results.put(459920023.0, 0.4283);
+    results.put(234443233.0, 0.2183);
+    results.put(-36507222016.0, -34.0);
+    results.put(0.0, 0.0);
+    for (Map.Entry<Double, Double> entry : results.entrySet()) {
+      assertThat(StorageUnit.BYTES.toGBs(entry.getKey()), is(entry.getValue()));
+    }
+  }
+
+  @Test
+  public void testBytesToTerraBytes() {
+    Map<Double, Double> results = new HashMap<>();
+    results.put(1.09951E+12, 1.0);
+    results.put(2.52888E+13, 23.0);
+    results.put(459920023.0, 0.0004);
+    results.put(234443233.0, 0.0002);
+    results.put(-3.73834E+13, -34.0);
+    results.put(0.0, 0.0);
+    for (Map.Entry<Double, Double> entry : results.entrySet()) {
+      assertThat(StorageUnit.BYTES.toTBs(entry.getKey()), is(entry.getValue()));
+    }
+  }
+
+  @Test
+  public void testBytesToPetaBytes() {
+    Map<Double, Double> results = new HashMap<>();
+    results.put(1.1259E+15, 1.0);
+    results.put(2.58957E+16, 23.0);
+    results.put(4.70958E+11, 0.0004);
+    results.put(234443233.0, 0.0000); // Out of precision window.
+    results.put(-3.82806E+16, -34.0);
+    results.put(0.0, 0.0);
+    for (Map.Entry<Double, Double> entry : results.entrySet()) {
+      assertThat(StorageUnit.BYTES.toPBs(entry.getKey()), is(entry.getValue()));
+    }
+  }
+
+  @Test
+  public void testBytesToExaBytes() {
+    Map<Double, Double> results = new HashMap<>();
+    results.put(1.15292E+18, 1.0);
+    results.put(2.65172E+19, 23.0);
+    results.put(4.82261E+14, 0.0004);
+    results.put(234443233.0, 0.0000); // Out of precision window.
+    results.put(-3.91993E+19, -34.0);
+    results.put(0.0, 0.0);
+    for (Map.Entry<Double, Double> entry : results.entrySet()) {
+      assertThat(StorageUnit.BYTES.toEBs(entry.getKey()), is(entry.getValue()));
+    }
+  }
+
+  @Test
+  public void testByteConversions() {
+    assertThat(StorageUnit.BYTES.getShortName(), is("b"));
+    assertThat(StorageUnit.BYTES.getSuffixChar(), is("b"));
+
+    assertThat(StorageUnit.BYTES.getLongName(), is("bytes"));
+    assertThat(StorageUnit.BYTES.toString(), is("bytes"));
+    assertThat(StorageUnit.BYTES.toBytes(1), is(1.0));
+    assertThat(StorageUnit.BYTES.toBytes(1024),
+        is(StorageUnit.BYTES.getDefault(1024)));
+    assertThat(StorageUnit.BYTES.fromBytes(10), is(10.0));
+  }
+
+  @Test
+  public void testKBConversions() {
+    assertThat(StorageUnit.KB.getShortName(), is("kb"));
+    assertThat(StorageUnit.KB.getSuffixChar(), is("k"));
+    assertThat(StorageUnit.KB.getLongName(), is("kilobytes"));
+    assertThat(StorageUnit.KB.toString(), is("kilobytes"));
+    assertThat(StorageUnit.KB.toKBs(1024),
+        is(StorageUnit.KB.getDefault(1024)));
+
+
+    assertThat(StorageUnit.KB.toBytes(1), is(KB));
+    assertThat(StorageUnit.KB.fromBytes(KB), is(1.0));
+
+    assertThat(StorageUnit.KB.toKBs(10), is(10.0));
+    assertThat(StorageUnit.KB.toMBs(3.0 * 1024.0), is(3.0));
+    assertThat(StorageUnit.KB.toGBs(1073741824), is(1024.0));
+    assertThat(StorageUnit.KB.toTBs(1073741824), is(1.0));
+    assertThat(StorageUnit.KB.toPBs(1.0995116e+12), is(1.0));
+    assertThat(StorageUnit.KB.toEBs(1.1258999e+15), is(1.0));
+  }
+
+  @Test
+  public void testMBConversions() {
+    assertThat(StorageUnit.MB.getShortName(), is("mb"));
+    assertThat(StorageUnit.MB.getSuffixChar(), is("m"));
+    assertThat(StorageUnit.MB.getLongName(), is("megabytes"));
+    assertThat(StorageUnit.MB.toString(), is("megabytes"));
+    assertThat(StorageUnit.MB.toMBs(1024),
+        is(StorageUnit.MB.getDefault(1024)));
+
+
+
+    assertThat(StorageUnit.MB.toBytes(1), is(MB));
+    assertThat(StorageUnit.MB.fromBytes(MB), is(1.0));
+
+    assertThat(StorageUnit.MB.toKBs(1), is(1024.0));
+    assertThat(StorageUnit.MB.toMBs(10), is(10.0));
+
+    assertThat(StorageUnit.MB.toGBs(44040192), is(43008.0));
+    assertThat(StorageUnit.MB.toTBs(1073741824), is(1024.0));
+    assertThat(StorageUnit.MB.toPBs(1073741824), is(1.0));
+    assertThat(StorageUnit.MB.toEBs(1 * (EB/MB)), is(1.0));
+  }
+
+  @Test
+  public void testGBConversions() {
+    assertThat(StorageUnit.GB.getShortName(), is("gb"));
+    assertThat(StorageUnit.GB.getSuffixChar(), is("g"));
+    assertThat(StorageUnit.GB.getLongName(), is("gigabytes"));
+    assertThat(StorageUnit.GB.toString(), is("gigabytes"));
+    assertThat(StorageUnit.GB.toGBs(1024),
+        is(StorageUnit.GB.getDefault(1024)));
+
+
+    assertThat(StorageUnit.GB.toBytes(1), is(GB));
+    assertThat(StorageUnit.GB.fromBytes(GB), is(1.0));
+
+    assertThat(StorageUnit.GB.toKBs(1), is(1024.0 * 1024));
+    assertThat(StorageUnit.GB.toMBs(10), is(10.0 * 1024));
+
+    assertThat(StorageUnit.GB.toGBs(44040192.0), is(44040192.0));
+    assertThat(StorageUnit.GB.toTBs(1073741824), is(1048576.0));
+    assertThat(StorageUnit.GB.toPBs(1.07375e+9), is(1024.0078));
+    assertThat(StorageUnit.GB.toEBs(1 * (EB/GB)), is(1.0));
+  }
+
+  @Test
+  public void testTBConversions() {
+    assertThat(StorageUnit.TB.getShortName(), is("tb"));
+    assertThat(StorageUnit.TB.getSuffixChar(), is("t"));
+    assertThat(StorageUnit.TB.getLongName(), is("terabytes"));
+    assertThat(StorageUnit.TB.toString(), is("terabytes"));
+    assertThat(StorageUnit.TB.toTBs(1024),
+        is(StorageUnit.TB.getDefault(1024)));
+
+    assertThat(StorageUnit.TB.toBytes(1), is(TB));
+    assertThat(StorageUnit.TB.fromBytes(TB), is(1.0));
+
+    assertThat(StorageUnit.TB.toKBs(1), is(1024.0 * 1024* 1024));
+    assertThat(StorageUnit.TB.toMBs(10), is(10.0 * 1024 * 1024));
+
+    assertThat(StorageUnit.TB.toGBs(44040192.0), is(45097156608.0));
+    assertThat(StorageUnit.TB.toTBs(1073741824.0), is(1073741824.0));
+    assertThat(StorageUnit.TB.toPBs(1024), is(1.0));
+    assertThat(StorageUnit.TB.toEBs(1 * (EB/TB)), is(1.0));
+  }
+
+  @Test
+  public void testPBConversions() {
+    assertThat(StorageUnit.PB.getShortName(), is("pb"));
+    assertThat(StorageUnit.PB.getSuffixChar(), is("p"));
+    assertThat(StorageUnit.PB.getLongName(), is("petabytes"));
+    assertThat(StorageUnit.PB.toString(), is("petabytes"));
+    assertThat(StorageUnit.PB.toPBs(1024),
+        is(StorageUnit.PB.getDefault(1024)));
+
+
+    assertThat(StorageUnit.PB.toBytes(1), is(PB));
+    assertThat(StorageUnit.PB.fromBytes(PB), is(1.0));
+
+    assertThat(StorageUnit.PB.toKBs(1), is(PB/KB));
+    assertThat(StorageUnit.PB.toMBs(10), is(10.0 * (PB / MB)));
+
+    assertThat(StorageUnit.PB.toGBs(44040192.0),
+        is(44040192.0 * PB/GB));
+    assertThat(StorageUnit.PB.toTBs(1073741824.0),
+        is(1073741824.0 * (PB/TB)));
+    assertThat(StorageUnit.PB.toPBs(1024.0), is(1024.0));
+    assertThat(StorageUnit.PB.toEBs(1024.0), is(1.0));
+  }
+
+
+  @Test
+  public void testEBConversions() {
+    assertThat(StorageUnit.EB.getShortName(), is("eb"));
+    assertThat(StorageUnit.EB.getSuffixChar(), is("e"));
+
+    assertThat(StorageUnit.EB.getLongName(), is("exabytes"));
+    assertThat(StorageUnit.EB.toString(), is("exabytes"));
+    assertThat(StorageUnit.EB.toEBs(1024),
+        is(StorageUnit.EB.getDefault(1024)));
+
+    assertThat(StorageUnit.EB.toBytes(1), is(EB));
+    assertThat(StorageUnit.EB.fromBytes(EB), is(1.0));
+
+    assertThat(StorageUnit.EB.toKBs(1), is(EB/KB));
+    assertThat(StorageUnit.EB.toMBs(10), is(10.0 * (EB / MB)));
+
+    assertThat(StorageUnit.EB.toGBs(44040192.0),
+        is(44040192.0 * EB/GB));
+    assertThat(StorageUnit.EB.toTBs(1073741824.0),
+        is(1073741824.0 * (EB/TB)));
+    assertThat(StorageUnit.EB.toPBs(1.0), is(1024.0));
+    assertThat(StorageUnit.EB.toEBs(42.0), is(42.0));
+  }
+
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[16/21] hadoop git commit: HDFS-13151. Fix the javadoc error in ReplicaInfo

Posted by ha...@apache.org.

HDFS-13151. Fix the javadoc error in ReplicaInfo

Signed-off-by: Akira Ajisaka <aa...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a1e05e02
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a1e05e02
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a1e05e02

Branch: refs/heads/HDFS-12996
Commit: a1e05e02927f29e5598fdc665ac997667e6b00b1
Parents: aae6299
Author: Bharat Viswanadham <bv...@hortonworks.com>
Authored: Fri Feb 16 16:22:24 2018 +0900
Committer: Akira Ajisaka <aa...@apache.org>
Committed: Fri Feb 16 16:22:24 2018 +0900

----------------------------------------------------------------------
 .../java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java    | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/a1e05e02/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java
index 3718799..4acf236 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInfo.java
@@ -53,7 +53,6 @@ abstract public class ReplicaInfo extends Block
    * Constructor.
    * @param block a block
    * @param vol volume where replica is located
-   * @param dir directory path where block and meta files are located
    */
   ReplicaInfo(Block block, FsVolumeSpi vol) {
     this(vol, block.getBlockId(), block.getNumBytes(),


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[06/21] hadoop git commit: HADOOP-15176. Enhance IAM Assumed Role support in S3A client. Contributed by Steve Loughran

Posted by ha...@apache.org.

HADOOP-15176. Enhance IAM Assumed Role support in S3A client.
Contributed by Steve Loughran

(cherry picked from commit 96c047fbb98c2378eed9693a724d4cbbd03c00fd)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/9a013b25
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/9a013b25
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/9a013b25

Branch: refs/heads/HDFS-12996
Commit: 9a013b255f301c557c3868dc1ad657202e9e7a67
Parents: b27ab7d
Author: Steve Loughran <st...@apache.org>
Authored: Thu Feb 15 15:56:10 2018 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Feb 15 15:57:10 2018 +0000

----------------------------------------------------------------------
 .../apache/hadoop/util/JsonSerialization.java   |   8 +
 .../src/main/resources/core-default.xml         |  13 +-
 .../org/apache/hadoop/test/LambdaTestUtils.java |  40 +-
 .../apache/hadoop/test/TestLambdaTestUtils.java |  36 +
 .../fs/s3a/AssumedRoleCredentialProvider.java   | 197 -----
 .../org/apache/hadoop/fs/s3a/Constants.java     |   2 +-
 .../org/apache/hadoop/fs/s3a/S3AFileSystem.java |  17 +-
 .../java/org/apache/hadoop/fs/s3a/S3AUtils.java |  53 +-
 .../s3a/auth/AssumedRoleCredentialProvider.java | 205 +++++
 .../apache/hadoop/fs/s3a/auth/RoleModel.java    | 314 ++++++++
 .../apache/hadoop/fs/s3a/auth/RolePolicies.java | 228 ++++++
 .../apache/hadoop/fs/s3a/auth/package-info.java |  27 +
 .../hadoop/fs/s3a/commit/CommitOperations.java  |   2 +-
 .../markdown/tools/hadoop-aws/assumed_roles.md  | 274 ++++++-
 .../site/markdown/tools/hadoop-aws/testing.md   |  15 +-
 .../s3a/ITestS3AContractDistCpAssumedRole.java  |  52 --
 .../apache/hadoop/fs/s3a/ITestAssumeRole.java   | 324 --------
 .../org/apache/hadoop/fs/s3a/S3ATestUtils.java  |  46 +-
 .../fs/s3a/TestS3AAWSCredentialsProvider.java   |  40 +-
 .../hadoop/fs/s3a/auth/ITestAssumeRole.java     | 789 +++++++++++++++++++
 .../auth/ITestAssumedRoleCommitOperations.java  | 130 +++
 .../hadoop/fs/s3a/auth/RoleTestUtils.java       | 171 ++++
 .../fs/s3a/commit/AbstractCommitITest.java      |  12 +-
 .../fs/s3a/commit/ITestCommitOperations.java    |   4 +-
 24 files changed, 2345 insertions(+), 654 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java
index 15f4fef..86c4df6 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java
@@ -90,6 +90,14 @@ public class JsonSerialization<T> {
   }
 
   /**
+   * Get the mapper of this class.
+   * @return the mapper
+   */
+  public ObjectMapper getMapper() {
+    return mapper;
+  }
+
+  /**
    * Convert from JSON.
    *
    * @param json input

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index ede1f1c..ece54c4 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -977,20 +977,21 @@
 </property>
 
 <property>
-  <name>fs.s3a.assumed.role.session.duration</name>
-  <value>30m</value>
+  <name>fs.s3a.assumed.role.policy</name>
+  <value/>
   <description>
-    Duration of assumed roles before a refresh is attempted.
+    JSON policy to apply to the role.
     Only used if AssumedRoleCredentialProvider is the AWS credential provider.
   </description>
 </property>
 
 <property>
-  <name>fs.s3a.assumed.role.policy</name>
-  <value/>
+  <name>fs.s3a.assumed.role.session.duration</name>
+  <value>30m</value>
   <description>
-    JSON policy containing more restrictions to apply to the role.
+    Duration of assumed roles before a refresh is attempted.
     Only used if AssumedRoleCredentialProvider is the AWS credential provider.
+    Range: 15m to 1h
   </description>
 </property>
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java
index 22208f7..cbb5288 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java
@@ -604,8 +604,44 @@ public final class LambdaTestUtils {
   public static <T> void assertOptionalUnset(String message,
       Optional<T> actual) {
     Assert.assertNotNull(message, actual);
-    if (actual.isPresent()) {
-      Assert.fail("Expected empty option, got " + actual.get().toString());
+    actual.ifPresent(
+        t -> Assert.fail("Expected empty option, got " + t.toString()));
+  }
+
+  /**
+   * Invoke a callable; wrap all checked exceptions with an
+   * AssertionError.
+   * @param closure closure to execute
+   * @param <T> return type of closure
+   * @return the value of the closure
+   * @throws AssertionError if the operation raised an IOE or
+   * other checked exception.
+   */
+  public static <T> T eval(Callable<T> closure) {
+    try {
+      return closure.call();
+    } catch (RuntimeException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new AssertionError(e.toString(), e);
+    }
+  }
+
+  /**
+   * Invoke a callable; wrap all checked exceptions with an
+   * AssertionError.
+   * @param closure closure to execute
+   * @return the value of the closure
+   * @throws AssertionError if the operation raised an IOE or
+   * other checked exception.
+   */
+  public static void eval(VoidCallable closure) {
+    try {
+      closure.call();
+    } catch (RuntimeException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new AssertionError(e.toString(), e);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestLambdaTestUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestLambdaTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestLambdaTestUtils.java
index c790a18..694fe73 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestLambdaTestUtils.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestLambdaTestUtils.java
@@ -493,4 +493,40 @@ public class TestLambdaTestUtils extends Assert {
     assertMinRetryCount(0);
   }
 
+  @Test
+  public void testEvalToSuccess() {
+    assertTrue("Eval to success", eval(() -> true));
+  }
+
+  /**
+   * There's no attempt to wrap an unchecked exception
+   * with an AssertionError.
+   */
+  @Test
+  public void testEvalDoesntWrapRTEs() throws Throwable {
+    intercept(RuntimeException.class, "",
+        () -> eval(() -> {
+          throw new RuntimeException("t");
+        }));
+  }
+
+  /**
+   * Verify that IOEs are caught and wrapped, and that the
+   * inner cause is the original IOE.
+   */
+  @Test
+  public void testEvalDoesWrapIOEs() throws Throwable {
+    AssertionError ex = intercept(AssertionError.class, "ioe",
+        () -> eval(() -> {
+          throw new IOException("ioe");
+        }));
+    Throwable cause = ex.getCause();
+    if (cause == null) {
+      throw ex;
+    }
+    if (!(cause instanceof IOException)) {
+      throw cause;
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AssumedRoleCredentialProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AssumedRoleCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AssumedRoleCredentialProvider.java
deleted file mode 100644
index 26f1f4e..0000000
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AssumedRoleCredentialProvider.java
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.fs.s3a;
-
-import java.io.Closeable;
-import java.io.IOException;
-import java.util.Locale;
-import java.util.concurrent.TimeUnit;
-
-import com.amazonaws.auth.AWSCredentials;
-import com.amazonaws.auth.AWSCredentialsProvider;
-import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider;
-import com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException;
-import com.google.common.annotations.VisibleForTesting;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.security.UserGroupInformation;
-
-import static org.apache.hadoop.fs.s3a.Constants.*;
-import static org.apache.hadoop.fs.s3a.S3AUtils.createAWSCredentialProvider;
-import static org.apache.hadoop.fs.s3a.S3AUtils.loadAWSProviderClasses;
-
-/**
- * Support IAM Assumed roles by instantiating an instance of
- * {@code STSAssumeRoleSessionCredentialsProvider} from configuration
- * properties, including wiring up the inner authenticator, and,
- * unless overridden, creating a session name from the current user.
- */
-public class AssumedRoleCredentialProvider implements AWSCredentialsProvider,
-    Closeable {
-
-  private static final Logger LOG =
-      LoggerFactory.getLogger(AssumedRoleCredentialProvider.class);
-  public static final String NAME
-      = "org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider";
-
-  static final String E_FORBIDDEN_PROVIDER =
-      "AssumedRoleCredentialProvider cannot be in "
-          + ASSUMED_ROLE_CREDENTIALS_PROVIDER;
-
-  public static final String E_NO_ROLE = "Unset property "
-      + ASSUMED_ROLE_ARN;
-
-  private final STSAssumeRoleSessionCredentialsProvider stsProvider;
-
-  private final String sessionName;
-
-  private final long duration;
-
-  private final String arn;
-
-  /**
-   * Instantiate.
-   * This calls {@link #getCredentials()} to fail fast on the inner
-   * role credential retrieval.
-   * @param conf configuration
-   * @throws IOException on IO problems and some parameter checking
-   * @throws IllegalArgumentException invalid parameters
-   * @throws AWSSecurityTokenServiceException problems getting credentials
-   */
-  public AssumedRoleCredentialProvider(Configuration conf) throws IOException {
-
-    arn = conf.getTrimmed(ASSUMED_ROLE_ARN, "");
-    if (StringUtils.isEmpty(arn)) {
-      throw new IOException(E_NO_ROLE);
-    }
-
-    // build up the base provider
-    Class<?>[] awsClasses = loadAWSProviderClasses(conf,
-        ASSUMED_ROLE_CREDENTIALS_PROVIDER,
-        SimpleAWSCredentialsProvider.class);
-    AWSCredentialProviderList credentials = new AWSCredentialProviderList();
-    for (Class<?> aClass : awsClasses) {
-      if (this.getClass().equals(aClass)) {
-        throw new IOException(E_FORBIDDEN_PROVIDER);
-      }
-      credentials.add(createAWSCredentialProvider(conf, aClass));
-    }
-
-    // then the STS binding
-    sessionName = conf.getTrimmed(ASSUMED_ROLE_SESSION_NAME,
-        buildSessionName());
-    duration = conf.getTimeDuration(ASSUMED_ROLE_SESSION_DURATION,
-        ASSUMED_ROLE_SESSION_DURATION_DEFAULT, TimeUnit.SECONDS);
-    String policy = conf.getTrimmed(ASSUMED_ROLE_POLICY, "");
-
-    LOG.info("{}", this);
-    STSAssumeRoleSessionCredentialsProvider.Builder builder
-        = new STSAssumeRoleSessionCredentialsProvider.Builder(arn, sessionName);
-    builder.withRoleSessionDurationSeconds((int) duration);
-    if (StringUtils.isNotEmpty(policy)) {
-      LOG.debug("Scope down policy {}", policy);
-      builder.withScopeDownPolicy(policy);
-    }
-    String epr = conf.get(ASSUMED_ROLE_STS_ENDPOINT, "");
-    if (StringUtils.isNotEmpty(epr)) {
-      LOG.debug("STS Endpoint: {}", epr);
-      builder.withServiceEndpoint(epr);
-    }
-    LOG.debug("Credentials to obtain role credentials: {}", credentials);
-    builder.withLongLivedCredentialsProvider(credentials);
-    stsProvider = builder.build();
-    // and force in a fail-fast check just to keep the stack traces less
-    // convoluted
-    getCredentials();
-  }
-
-  /**
-   * Get credentials.
-   * @return the credentials
-   * @throws AWSSecurityTokenServiceException if none could be obtained.
-   */
-  @Override
-  public AWSCredentials getCredentials() {
-    try {
-      return stsProvider.getCredentials();
-    } catch (AWSSecurityTokenServiceException e) {
-      LOG.error("Failed to get credentials for role {}",
-          arn, e);
-      throw e;
-    }
-  }
-
-  @Override
-  public void refresh() {
-    stsProvider.refresh();
-  }
-
-  /**
-   * Propagate the close() call to the inner stsProvider.
-   */
-  @Override
-  public void close() {
-    stsProvider.close();
-  }
-
-  @Override
-  public String toString() {
-    final StringBuilder sb = new StringBuilder(
-        "AssumedRoleCredentialProvider{");
-    sb.append("role='").append(arn).append('\'');
-    sb.append(", session'").append(sessionName).append('\'');
-    sb.append(", duration=").append(duration);
-    sb.append('}');
-    return sb.toString();
-  }
-
-  /**
-   * Build the session name from the current user's shortname.
-   * @return a string for the session name.
-   * @throws IOException failure to get the current user
-   */
-  static String buildSessionName() throws IOException {
-    return sanitize(UserGroupInformation.getCurrentUser()
-        .getShortUserName());
-  }
-
-  /**
-   * Build a session name from the string, sanitizing it for the permitted
-   * characters.
-   * @param session source session
-   * @return a string for use in role requests.
-   */
-  @VisibleForTesting
-  static String sanitize(String session) {
-    StringBuilder r = new StringBuilder(session.length());
-    for (char c: session.toCharArray()) {
-      if ("abcdefghijklmnopqrstuvwxyz0123456789,.@-".contains(
-          Character.toString(c).toLowerCase(Locale.ENGLISH))) {
-        r.append(c);
-      } else {
-        r.append('-');
-      }
-    }
-    return r.toString();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
index 38aaeaa..faec784 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@@ -94,7 +94,7 @@ public final class Constants {
   public static final String ASSUMED_ROLE_CREDENTIALS_PROVIDER =
       "fs.s3a.assumed.role.credentials.provider";
 
-  /** JSON policy containing more restrictions to apply to the role. */
+  /** JSON policy containing the policy to apply to the role. */
   public static final String ASSUMED_ROLE_POLICY =
       "fs.s3a.assumed.role.policy";
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index fced494..da8f38b 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -1399,9 +1399,9 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities {
     } catch (MultiObjectDeleteException e) {
       // one or more of the operations failed.
       List<MultiObjectDeleteException.DeleteError> errors = e.getErrors();
-      LOG.error("Partial failure of delete, {} errors", errors.size(), e);
+      LOG.debug("Partial failure of delete, {} errors", errors.size(), e);
       for (MultiObjectDeleteException.DeleteError error : errors) {
-        LOG.error("{}: \"{}\" - {}",
+        LOG.debug("{}: \"{}\" - {}",
             error.getKey(), error.getCode(), error.getMessage());
       }
       throw e;
@@ -1649,7 +1649,9 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities {
       blockRootDelete(keyVersion.getKey());
     }
     if (enableMultiObjectsDelete) {
-      deleteObjects(new DeleteObjectsRequest(bucket).withKeys(keysToDelete));
+      deleteObjects(new DeleteObjectsRequest(bucket)
+          .withKeys(keysToDelete)
+          .withQuiet(true));
     } else {
       for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
         deleteObject(keyVersion.getKey());
@@ -1684,7 +1686,13 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities {
       entryPoint(INVOCATION_DELETE);
       boolean outcome = innerDelete(innerGetFileStatus(f, true), recursive);
       if (outcome) {
-        maybeCreateFakeParentDirectory(f);
+        try {
+          maybeCreateFakeParentDirectory(f);
+        } catch (AccessDeniedException e) {
+          LOG.warn("Cannot create directory marker at {}: {}",
+              f.getParent(), e.toString());
+          LOG.debug("Failed to create fake dir above {}", f, e);
+        }
       }
       return outcome;
     } catch (FileNotFoundException e) {
@@ -1827,6 +1835,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities {
    * @throws IOException IO problem
    * @throws AmazonClientException untranslated AWS client problem
    */
+  @Retries.RetryTranslated
   void maybeCreateFakeParentDirectory(Path path)
       throws IOException, AmazonClientException {
     Path parent = path.getParent();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
index 4dd6ed1..2b64a76 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
@@ -30,6 +30,7 @@ import com.amazonaws.services.dynamodbv2.model.LimitExceededException;
 import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputExceededException;
 import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
 import com.amazonaws.services.s3.model.AmazonS3Exception;
+import com.amazonaws.services.s3.model.MultiObjectDeleteException;
 import com.amazonaws.services.s3.model.S3ObjectSummary;
 
 import com.google.common.base.Preconditions;
@@ -248,6 +249,14 @@ public final class S3AUtils {
         ioe = new AWSStatus500Exception(message, ase);
         break;
 
+      case 200:
+        if (exception instanceof MultiObjectDeleteException) {
+          // failure during a bulk delete
+          return translateMultiObjectDeleteException(message,
+              (MultiObjectDeleteException) exception);
+        }
+        // other 200: FALL THROUGH
+
       default:
         // no specific exit code. Choose an IOE subclass based on the class
         // of the caught exception
@@ -379,6 +388,40 @@ public final class S3AUtils {
   }
 
   /**
+   * A MultiObjectDeleteException is raised if one or more delete objects
+   * listed in a bulk DELETE operation failed.
+   * The top-level exception is therefore just "something wasn't deleted",
+   * but doesn't include the what or the why.
+   * This translation will extract an AccessDeniedException if that's one of
+   * the causes, otherwise grabs the status code and uses it in the
+   * returned exception.
+   * @param message text for the exception
+   * @param ex exception to translate
+   * @return an IOE with more detail.
+   */
+  public static IOException translateMultiObjectDeleteException(String message,
+      MultiObjectDeleteException ex) {
+    List<String> keys;
+    StringBuffer result = new StringBuffer(ex.getErrors().size() * 100);
+    result.append(message).append(": ");
+    String exitCode = "";
+    for (MultiObjectDeleteException.DeleteError error : ex.getErrors()) {
+      String code = error.getCode();
+      result.append(String.format("%s: %s: %s%n", code, error.getKey(),
+          error.getMessage()));
+      if (exitCode.isEmpty() ||  "AccessDenied".equals(code)) {
+        exitCode = code;
+      }
+    }
+    if ("AccessDenied".equals(exitCode)) {
+      return (IOException) new AccessDeniedException(result.toString())
+          .initCause(ex);
+    } else {
+      return new AWSS3IOException(result.toString(), ex);
+    }
+  }
+
+  /**
    * Get low level details of an amazon exception for logging; multi-line.
    * @param e exception
    * @return string details
@@ -534,7 +577,7 @@ public final class S3AUtils {
    * @return the list of classes, possibly empty
    * @throws IOException on a failure to load the list.
    */
-  static Class<?>[] loadAWSProviderClasses(Configuration conf,
+  public static Class<?>[] loadAWSProviderClasses(Configuration conf,
       String key,
       Class<?>... defaultValue) throws IOException {
     try {
@@ -564,7 +607,7 @@ public final class S3AUtils {
    * @return the instantiated class
    * @throws IOException on any instantiation failure.
    */
-  static AWSCredentialsProvider createAWSCredentialProvider(
+  public static AWSCredentialsProvider createAWSCredentialProvider(
       Configuration conf, Class<?> credClass) throws IOException {
     AWSCredentialsProvider credentials;
     String className = credClass.getName();
@@ -973,14 +1016,18 @@ public final class S3AUtils {
    * iterator.
    * @param iterator iterator from a list
    * @param eval closure to evaluate
+   * @return the number of files processed
    * @throws IOException anything in the closure, or iteration logic.
    */
-  public static void applyLocatedFiles(
+  public static long applyLocatedFiles(
       RemoteIterator<LocatedFileStatus> iterator,
       CallOnLocatedFileStatus eval) throws IOException {
+    long count = 0;
     while (iterator.hasNext()) {
+      count++;
       eval.call(iterator.next());
     }
+    return count;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java
new file mode 100644
index 0000000..42809c8
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.auth;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.Locale;
+import java.util.concurrent.TimeUnit;
+
+import com.amazonaws.auth.AWSCredentials;
+import com.amazonaws.auth.AWSCredentialsProvider;
+import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider;
+import com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException;
+import com.google.common.annotations.VisibleForTesting;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.s3a.AWSCredentialProviderList;
+import org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import static org.apache.hadoop.fs.s3a.Constants.*;
+import static org.apache.hadoop.fs.s3a.S3AUtils.createAWSCredentialProvider;
+import static org.apache.hadoop.fs.s3a.S3AUtils.loadAWSProviderClasses;
+
+/**
+ * Support IAM Assumed roles by instantiating an instance of
+ * {@code STSAssumeRoleSessionCredentialsProvider} from configuration
+ * properties, including wiring up the inner authenticator, and,
+ * unless overridden, creating a session name from the current user.
+ *
+ * Classname is used in configuration files; do not move.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class AssumedRoleCredentialProvider implements AWSCredentialsProvider,
+    Closeable {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(AssumedRoleCredentialProvider.class);
+  public static final String NAME
+      = "org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider";
+
+  static final String E_FORBIDDEN_PROVIDER =
+      "AssumedRoleCredentialProvider cannot be in "
+          + ASSUMED_ROLE_CREDENTIALS_PROVIDER;
+
+  public static final String E_NO_ROLE = "Unset property "
+      + ASSUMED_ROLE_ARN;
+
+  private final STSAssumeRoleSessionCredentialsProvider stsProvider;
+
+  private final String sessionName;
+
+  private final long duration;
+
+  private final String arn;
+
+  /**
+   * Instantiate.
+   * This calls {@link #getCredentials()} to fail fast on the inner
+   * role credential retrieval.
+   * @param conf configuration
+   * @throws IOException on IO problems and some parameter checking
+   * @throws IllegalArgumentException invalid parameters
+   * @throws AWSSecurityTokenServiceException problems getting credentials
+   */
+  public AssumedRoleCredentialProvider(Configuration conf) throws IOException {
+
+    arn = conf.getTrimmed(ASSUMED_ROLE_ARN, "");
+    if (StringUtils.isEmpty(arn)) {
+      throw new IOException(E_NO_ROLE);
+    }
+
+    // build up the base provider
+    Class<?>[] awsClasses = loadAWSProviderClasses(conf,
+        ASSUMED_ROLE_CREDENTIALS_PROVIDER,
+        SimpleAWSCredentialsProvider.class);
+    AWSCredentialProviderList credentials = new AWSCredentialProviderList();
+    for (Class<?> aClass : awsClasses) {
+      if (this.getClass().equals(aClass)) {
+        throw new IOException(E_FORBIDDEN_PROVIDER);
+      }
+      credentials.add(createAWSCredentialProvider(conf, aClass));
+    }
+
+    // then the STS binding
+    sessionName = conf.getTrimmed(ASSUMED_ROLE_SESSION_NAME,
+        buildSessionName());
+    duration = conf.getTimeDuration(ASSUMED_ROLE_SESSION_DURATION,
+        ASSUMED_ROLE_SESSION_DURATION_DEFAULT, TimeUnit.SECONDS);
+    String policy = conf.getTrimmed(ASSUMED_ROLE_POLICY, "");
+
+    LOG.debug("{}", this);
+    STSAssumeRoleSessionCredentialsProvider.Builder builder
+        = new STSAssumeRoleSessionCredentialsProvider.Builder(arn, sessionName);
+    builder.withRoleSessionDurationSeconds((int) duration);
+    if (StringUtils.isNotEmpty(policy)) {
+      LOG.debug("Scope down policy {}", policy);
+      builder.withScopeDownPolicy(policy);
+    }
+    String epr = conf.get(ASSUMED_ROLE_STS_ENDPOINT, "");
+    if (StringUtils.isNotEmpty(epr)) {
+      LOG.debug("STS Endpoint: {}", epr);
+      builder.withServiceEndpoint(epr);
+    }
+    LOG.debug("Credentials to obtain role credentials: {}", credentials);
+    builder.withLongLivedCredentialsProvider(credentials);
+    stsProvider = builder.build();
+    // and force in a fail-fast check just to keep the stack traces less
+    // convoluted
+    getCredentials();
+  }
+
+  /**
+   * Get credentials.
+   * @return the credentials
+   * @throws AWSSecurityTokenServiceException if none could be obtained.
+   */
+  @Override
+  public AWSCredentials getCredentials() {
+    try {
+      return stsProvider.getCredentials();
+    } catch (AWSSecurityTokenServiceException e) {
+      LOG.error("Failed to get credentials for role {}",
+          arn, e);
+      throw e;
+    }
+  }
+
+  @Override
+  public void refresh() {
+    stsProvider.refresh();
+  }
+
+  /**
+   * Propagate the close() call to the inner stsProvider.
+   */
+  @Override
+  public void close() {
+    stsProvider.close();
+  }
+
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder(
+        "AssumedRoleCredentialProvider{");
+    sb.append("role='").append(arn).append('\'');
+    sb.append(", session'").append(sessionName).append('\'');
+    sb.append(", duration=").append(duration);
+    sb.append('}');
+    return sb.toString();
+  }
+
+  /**
+   * Build the session name from the current user's shortname.
+   * @return a string for the session name.
+   * @throws IOException failure to get the current user
+   */
+  static String buildSessionName() throws IOException {
+    return sanitize(UserGroupInformation.getCurrentUser()
+        .getShortUserName());
+  }
+
+  /**
+   * Build a session name from the string, sanitizing it for the permitted
+   * characters.
+   * @param session source session
+   * @return a string for use in role requests.
+   */
+  @VisibleForTesting
+  static String sanitize(String session) {
+    StringBuilder r = new StringBuilder(session.length());
+    for (char c: session.toCharArray()) {
+      if ("abcdefghijklmnopqrstuvwxyz0123456789,.@-".contains(
+          Character.toString(c).toLowerCase(Locale.ENGLISH))) {
+        r.append(c);
+      } else {
+        r.append('-');
+      }
+    }
+    return r.toString();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RoleModel.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RoleModel.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RoleModel.java
new file mode 100644
index 0000000..ca2c993
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RoleModel.java
@@ -0,0 +1,314 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.auth;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicLong;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.JsonSerialization;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
+
+/**
+ * Jackson Role Model for Role Properties, for API clients and tests.
+ *
+ * Doesn't have complete coverage of the entire AWS IAM policy model;
+ * don't expect to be able to parse everything.
+ * It can generate simple models.
+ * @see <a href="https://docs.aws.amazon.com/AmazonS3/latest/dev/s3-arn-format.html">Example S3 Policies</a>
+ * @see <a href="https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/api-permissions-reference.html">Dynamno DB Permissions</a>
+ */
+@InterfaceAudience.LimitedPrivate("Tests")
+@InterfaceStability.Unstable
+public class RoleModel {
+
+  public static final String VERSION = "2012-10-17";
+
+  public static final String BUCKET_RESOURCE_F = "arn:aws:s3:::%s/%s";
+
+
+  private static final AtomicLong SID_COUNTER = new AtomicLong(0);
+
+
+  private final JsonSerialization<Policy> serialization =
+      new JsonSerialization<>(Policy.class, false, true);
+
+  public RoleModel() {
+    ObjectMapper mapper = serialization.getMapper();
+    mapper.enable(SerializationFeature.WRITE_SINGLE_ELEM_ARRAYS_UNWRAPPED);
+  }
+
+  public String toJson(Policy policy) throws JsonProcessingException {
+    return serialization.toJson(policy);
+  }
+
+  /**
+   * Statement ID factory.
+   * @return a statement ID unique for this JVM's life.
+   */
+  public static String newSid() {
+    SID_COUNTER.incrementAndGet();
+    return SID_COUNTER.toString();
+  }
+
+  /**
+   * Map a bool to an effect.
+   * @param allowed is the statement to allow actions?
+   * @return the appropriate effect.
+   */
+  public static Effects effect(final boolean allowed) {
+    return allowed ? Effects.Allow : Effects.Deny;
+  }
+
+  /**
+   * Create a resource.
+   * @param bucket bucket
+   * @param key key
+   * @param addWildcard add a * to the tail of the key?
+   * @return a resource for a statement.
+   */
+  @SuppressWarnings("StringConcatenationMissingWhitespace")
+  public static String resource(String bucket, String key,
+      boolean addWildcard) {
+    return String.format(BUCKET_RESOURCE_F, bucket,
+        key + (addWildcard ? "*" : ""));
+  }
+
+  /**
+   * Given a path, return the S3 resource to it.
+   * If {@code isDirectory} is true, a "/" is added to the path.
+   * This is critical when adding wildcard permissions under
+   * a directory, and also needed when locking down dir-as-file
+   * and dir-as-directory-marker access.
+   * @param path a path
+   * @param isDirectory is this a directory?
+   * @param addWildcard add a * to the tail of the key?
+   * @return a resource for a statement.
+   */
+  public static String resource(Path path,
+      final boolean isDirectory,
+      boolean addWildcard) {
+    String key = pathToKey(path);
+    if (isDirectory && !key.isEmpty()) {
+      key = key + "/";
+    }
+    return resource(path.toUri().getHost(), key, addWildcard);
+  }
+
+  /**
+   * Given a directory path, return the S3 resource to it.
+   * @param path a path
+   * @return a resource for a statement.
+   */
+  public static String[] directory(Path path) {
+    String host = path.toUri().getHost();
+    String key = pathToKey(path);
+    if (!key.isEmpty()) {
+      return new String[] {
+          resource(host, key + "/", true),
+          resource(host, key, false),
+          resource(host, key + "/", false),
+      };
+    } else {
+      return new String[]{
+          resource(host, key, true),
+      };
+    }
+  }
+
+  /**
+   * Variant of {@link S3AFileSystem#pathToKey(Path)} which doesn't care
+   * about working directories, so can be static and stateless.
+   * @param path path to map
+   * @return key or ""
+   */
+  public static String pathToKey(Path path) {
+    if (path.toUri().getScheme() != null && path.toUri().getPath().isEmpty()) {
+      return "";
+    }
+    return path.toUri().getPath().substring(1);
+  }
+
+  /**
+   * Create a statement.
+   * @param allow allow or deny
+   * @param scope scope
+   * @param actions actions
+   * @return the formatted json statement
+   */
+  public static Statement statement(boolean allow,
+      String scope,
+      String... actions) {
+    return new Statement(RoleModel.effect(allow))
+        .addActions(actions)
+        .addResources(scope);
+  }
+
+  /**
+   * Create a statement.
+   * If {@code isDirectory} is true, a "/" is added to the path.
+   * This is critical when adding wildcard permissions under
+   * a directory, and also needed when locking down dir-as-file
+   * and dir-as-directory-marker access.
+   * @param allow allow or deny
+   * @param path path
+   * @param isDirectory is this a directory?
+   * @param actions action
+   * @return the formatted json statement
+   */
+  public static Statement statement(
+      final boolean allow,
+      final Path path,
+      final boolean isDirectory,
+      final boolean wildcards,
+      final String... actions) {
+    return new Statement(RoleModel.effect(allow))
+        .addActions(actions)
+        .addResources(resource(path, isDirectory, wildcards));
+  }
+
+  /**
+   * From a set of statements, create a policy.
+   * @param statements statements
+   * @return the policy
+   */
+  public static Policy policy(Statement... statements) {
+    return new Policy(statements);
+  }
+
+
+  /**
+   * Effect options.
+   */
+  public enum Effects {
+    Allow,
+    Deny
+  }
+
+  /**
+   * Any element in a role.
+   */
+  public static abstract class RoleElt {
+
+    protected RoleElt() {
+    }
+
+    /**
+     * validation operation.
+     */
+    public void validate() {
+
+    }
+  }
+
+  /**
+   * A single statement.
+   */
+  public static class Statement extends RoleElt {
+
+    @JsonProperty("Sid")
+    public String sid = newSid();
+
+    /**
+     * Default effect is Deny; forces callers to switch on Allow.
+     */
+    @JsonProperty("Effect")
+    public Effects effect;
+
+    @JsonProperty("Action")
+    public List<String> action = new ArrayList<>(1);
+
+    @JsonProperty("Resource")
+    public List<String> resource = new ArrayList<>(1);
+
+    public Statement(final Effects effect) {
+      this.effect = effect;
+    }
+
+    @Override
+    public void validate() {
+      checkNotNull(sid, "Sid");
+      checkNotNull(effect, "Effect");
+      checkState(!(action.isEmpty()), "Empty Action");
+      checkState(!(resource.isEmpty()), "Empty Resource");
+    }
+
+    public Statement setAllowed(boolean f) {
+      effect = effect(f);
+      return this;
+    }
+
+    public Statement addActions(String... actions) {
+      Collections.addAll(action, actions);
+      return this;
+    }
+
+    public Statement addResources(String... resources) {
+      Collections.addAll(resource, resources);
+      return this;
+    }
+
+  }
+
+  /**
+   * A policy is one or more statements.
+   */
+  public static class Policy extends RoleElt {
+
+    @JsonProperty("Version")
+    public String version = VERSION;
+
+    @JsonProperty("Statement")
+    public List<Statement> statement;
+
+    public Policy(final List<RoleModel.Statement> statement) {
+      this.statement = statement;
+    }
+
+    public Policy(RoleModel.Statement... statements) {
+      statement = Arrays.asList(statements);
+    }
+
+    /**
+     * Validation includes validating all statements.
+     */
+    @Override
+    public void validate() {
+      checkNotNull(statement, "Statement");
+      checkState(VERSION.equals(version), "Invalid Version: %s", version);
+      statement.stream().forEach((a) -> a.validate());
+    }
+
+  }
+
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RolePolicies.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RolePolicies.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RolePolicies.java
new file mode 100644
index 0000000..6711eee
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RolePolicies.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.auth;
+
+import static org.apache.hadoop.fs.s3a.auth.RoleModel.*;
+
+/**
+ * Operations, statements and policies covering the operations
+ * needed to work with S3 and S3Guard.
+ */
+public final class RolePolicies {
+
+  private RolePolicies() {
+  }
+
+  /**
+   * All S3 operations: {@value}.
+   */
+  public static final String S3_ALL_OPERATIONS = "s3:*";
+
+  /**
+   * All S3 buckets: {@value}.
+   */
+  public static final String S3_ALL_BUCKETS = "arn:aws:s3:::*";
+
+
+  public static final String S3_ALL_LIST_OPERATIONS = "s3:List*";
+
+  public static final String S3_ALL_LIST_BUCKET = "s3:ListBucket*";
+
+  public static final String S3_LIST_BUCKET = "s3:ListBucket";
+
+  /**
+   * This is used by the abort operation in S3A commit work.
+   */
+  public static final String S3_LIST_BUCKET_MULTPART_UPLOADS =
+      "s3:ListBucketMultipartUploads";
+
+
+  /**
+   * List multipart upload is needed for the S3A Commit protocols.
+   */
+  public static final String S3_LIST_MULTIPART_UPLOAD_PARTS
+      = "s3:ListMultipartUploadParts";
+
+  /**
+   * abort multipart upload is needed for the S3A Commit protocols.
+   */
+  public static final String S3_ABORT_MULTIPART_UPLOAD
+      = "s3:AbortMultipartUpload";
+
+  /**
+   * All s3:Delete* operations.
+   */
+  public static final String S3_ALL_DELETE = "s3:Delete*";
+
+
+  public static final String S3_DELETE_OBJECT = "s3:DeleteObject";
+
+  public static final String S3_DELETE_OBJECT_TAGGING
+      = "s3:DeleteObjectTagging";
+
+  public static final String S3_DELETE_OBJECT_VERSION
+      = "s3:DeleteObjectVersion";
+
+  public static final String S3_DELETE_OBJECT_VERSION_TAGGING
+      = "s3:DeleteObjectVersionTagging";
+
+  /**
+   * All s3:Get* operations.
+   */
+  public static final String S3_ALL_GET = "s3:Get*";
+
+  public static final String S3_GET_OBJECT = "s3:GetObject";
+
+  public static final String S3_GET_OBJECT_ACL = "s3:GetObjectAcl";
+
+  public static final String S3_GET_OBJECT_TAGGING = "s3:GetObjectTagging";
+
+  public static final String S3_GET_OBJECT_TORRENT = "s3:GetObjectTorrent";
+
+  public static final String S3_GET_OBJECT_VERSION = "s3:GetObjectVersion";
+
+  public static final String S3_GET_OBJECT_VERSION_ACL
+      = "s3:GetObjectVersionAcl";
+
+  public static final String S3_GET_OBJECT_VERSION_TAGGING
+      = "s3:GetObjectVersionTagging";
+
+  public static final String S3_GET_OBJECT_VERSION_TORRENT
+      = "s3:GetObjectVersionTorrent";
+
+
+  /**
+   * S3 Put*.
+   * This covers single an multipart uploads, but not list/abort of the latter.
+   */
+  public static final String S3_ALL_PUT = "s3:Put*";
+
+  public static final String S3_PUT_OBJECT = "s3:PutObject";
+
+  public static final String S3_PUT_OBJECT_ACL = "s3:PutObjectAcl";
+
+  public static final String S3_PUT_OBJECT_TAGGING = "s3:PutObjectTagging";
+
+  public static final String S3_PUT_OBJECT_VERSION_ACL
+      = "s3:PutObjectVersionAcl";
+
+  public static final String S3_PUT_OBJECT_VERSION_TAGGING
+      = "s3:PutObjectVersionTagging";
+
+  public static final String S3_RESTORE_OBJECT = "s3:RestoreObject";
+
+  /**
+   * Actions needed to read data from S3 through S3A.
+   */
+  public static final String[] S3_PATH_READ_OPERATIONS =
+      new String[]{
+          S3_GET_OBJECT,
+      };
+
+  /**
+   * Actions needed to read data from S3 through S3A.
+   */
+  public static final String[] S3_ROOT_READ_OPERATIONS =
+      new String[]{
+          S3_LIST_BUCKET,
+          S3_LIST_BUCKET_MULTPART_UPLOADS,
+          S3_GET_OBJECT,
+      };
+
+  /**
+   * Actions needed to write data to an S3A Path.
+   * This includes the appropriate read operations.
+   */
+  public static final String[] S3_PATH_RW_OPERATIONS =
+      new String[]{
+          S3_ALL_GET,
+          S3_PUT_OBJECT,
+          S3_DELETE_OBJECT,
+          S3_ABORT_MULTIPART_UPLOAD,
+          S3_LIST_MULTIPART_UPLOAD_PARTS,
+      };
+
+  /**
+   * Actions needed to write data to an S3A Path.
+   * This is purely the extra operations needed for writing atop
+   * of the read operation set.
+   * Deny these and a path is still readable, but not writeable.
+   */
+  public static final String[] S3_PATH_WRITE_OPERATIONS =
+      new String[]{
+          S3_PUT_OBJECT,
+          S3_DELETE_OBJECT,
+          S3_ABORT_MULTIPART_UPLOAD
+      };
+
+  /**
+   * Actions needed for R/W IO from the root of a bucket.
+   */
+  public static final String[] S3_ROOT_RW_OPERATIONS =
+      new String[]{
+          S3_LIST_BUCKET,
+          S3_ALL_GET,
+          S3_PUT_OBJECT,
+          S3_DELETE_OBJECT,
+          S3_ABORT_MULTIPART_UPLOAD,
+          S3_LIST_MULTIPART_UPLOAD_PARTS,
+          S3_ALL_LIST_BUCKET,
+      };
+
+  /**
+   * All DynamoDB operations: {@value}.
+   */
+  public static final String DDB_ALL_OPERATIONS = "dynamodb:*";
+
+  public static final String DDB_ADMIN = "dynamodb:*";
+
+
+  public static final String DDB_BATCH_WRITE = "dynamodb:BatchWriteItem";
+
+  /**
+   * All DynamoDB tables: {@value}.
+   */
+  public static final String ALL_DDB_TABLES = "arn:aws:dynamodb:::*";
+
+
+
+  public static final String WILDCARD = "*";
+
+  /**
+   * Allow all S3 Operations.
+   */
+  public static final Statement STATEMENT_ALL_S3 = statement(true,
+      S3_ALL_BUCKETS,
+      S3_ALL_OPERATIONS);
+
+  /**
+   * Statement to allow all DDB access.
+   */
+  public static final Statement STATEMENT_ALL_DDB = statement(true,
+      ALL_DDB_TABLES, DDB_ALL_OPERATIONS);
+
+  /**
+   * Allow all S3 and S3Guard operations.
+   */
+  public static final Policy ALLOW_S3_AND_SGUARD = policy(
+      STATEMENT_ALL_S3,
+      STATEMENT_ALL_DDB
+  );
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/package-info.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/package-info.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/package-info.java
new file mode 100644
index 0000000..e34d68e
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/package-info.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Authentication and permissions support.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+package org.apache.hadoop.fs.s3a.auth;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/CommitOperations.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/CommitOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/CommitOperations.java
index f6e12f4..55ace17 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/CommitOperations.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/CommitOperations.java
@@ -309,7 +309,7 @@ public class CommitOperations {
         } catch (FileNotFoundException e) {
           LOG.debug("listed file already deleted: {}", pendingFile);
         } catch (IOException | IllegalArgumentException e) {
-          if (outcome == null) {
+          if (MaybeIOE.NONE.equals(outcome)) {
             outcome = new MaybeIOE(makeIOE(pendingFile.toString(), e));
           }
         } finally {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/assumed_roles.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/assumed_roles.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/assumed_roles.md
index 50a9a0d..3afd63f 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/assumed_roles.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/assumed_roles.md
@@ -26,6 +26,9 @@ to obtain the assumed role and refresh it regularly.
 By using per-filesystem configuration, it is possible to use different
 assumed roles for different buckets.
 
+*IAM Assumed Roles are unlikely to be supported by third-party systems
+supporting the S3 APIs.*
+
 ## Using IAM Assumed Roles
 
 ### Before You Begin
@@ -38,14 +41,13 @@ are, how to configure their policies, etc.
 * Have the AWS CLI installed, and test that it works there.
 * Give the role access to S3, and, if using S3Guard, to DynamoDB.
 
-
 Trying to learn how IAM Assumed Roles work by debugging stack traces from
 the S3A client is "suboptimal".
 
 ### <a name="how_it_works"></a> How the S3A connector support IAM Assumed Roles.
 
 To use assumed roles, the client must be configured to use the
-*Assumed Role Credential Provider*, `org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider`,
+*Assumed Role Credential Provider*, `org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider`,
 in the configuration option `fs.s3a.aws.credentials.provider`.
 
 This AWS Credential provider will read in the `fs.s3a.assumed.role` options needed to connect to the
@@ -54,7 +56,8 @@ first authenticating with the full credentials, then assuming the specific role
 specified. It will then refresh this login at the configured rate of
 `fs.s3a.assumed.role.session.duration`
 
-To authenticate with the STS service both for the initial credential retrieval
+To authenticate with the  [AWS STS service](https://docs.aws.amazon.com/STS/latest/APIReference/Welcome.html)
+both for the initial credential retrieval
 and for background refreshes, a different credential provider must be
 created, one which uses long-lived credentials (secret keys, environment variables).
 Short lived credentials (e.g other session tokens, EC2 instance credentials) cannot be used.
@@ -76,6 +79,7 @@ the previously created ARN.
 <property>
   <name>fs.s3a.aws.credentials.provider</name>
   <value>org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider</value>
+  <value>org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider</value>
 </property>
 
 <property>
@@ -116,7 +120,7 @@ Here are the full set of configuration options.
   <value />
   <description>
     AWS ARN for the role to be assumed.
-    Requires the fs.s3a.aws.credentials.provider list to contain
+    Required if the fs.s3a.aws.credentials.provider contains
     org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider
   </description>
 </property>
@@ -127,23 +131,27 @@ Here are the full set of configuration options.
   <description>
     Session name for the assumed role, must be valid characters according to
     the AWS APIs.
+    Only used if AssumedRoleCredentialProvider is the AWS credential provider.
     If not set, one is generated from the current Hadoop/Kerberos username.
   </description>
 </property>
 
 <property>
-  <name>fs.s3a.assumed.role.session.duration</name>
-  <value>30m</value>
+  <name>fs.s3a.assumed.role.policy</name>
+  <value/>
   <description>
-    Duration of assumed roles before a refresh is attempted.
+    JSON policy to apply to the role.
+    Only used if AssumedRoleCredentialProvider is the AWS credential provider.
   </description>
 </property>
 
 <property>
-  <name>fs.s3a.assumed.role.policy</name>
-  <value/>
+  <name>fs.s3a.assumed.role.session.duration</name>
+  <value>30m</value>
   <description>
-    Extra policy containing more restrictions to apply to the role.
+    Duration of assumed roles before a refresh is attempted.
+    Only used if AssumedRoleCredentialProvider is the AWS credential provider.
+    Range: 15m to 1h
   </description>
 </property>
 
@@ -152,37 +160,173 @@ Here are the full set of configuration options.
   <value/>
   <description>
     AWS Simple Token Service Endpoint. If unset, uses the default endpoint.
+    Only used if AssumedRoleCredentialProvider is the AWS credential provider.
   </description>
 </property>
 
 <property>
   <name>fs.s3a.assumed.role.credentials.provider</name>
-  <value/>
+  <value>org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider</value>
   <description>
-    Credential providers used to authenticate with the STS endpoint and retrieve
-    the role tokens.
+    List of credential providers to authenticate with the STS endpoint and
+    retrieve short-lived role credentials.
+    Only used if AssumedRoleCredentialProvider is the AWS credential provider.
     If unset, uses "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider".
   </description>
 </property>
 ```
 
+## <a name="polices"></a> Restricting S3A operations through AWS Policies
+
+The S3A client needs to be granted specific permissions in order
+to work with a bucket.
+Here is a non-normative list of the permissions which must be granted
+for FileSystem operations to work.
+
+*Disclaimer* The specific set of actions which the S3A connector needs
+will change over time.
+
+As more operations are added to the S3A connector, and as the
+means by which existing operations are implemented change, the
+AWS actions which are required by the client will change.
+
+These lists represent the minimum actions to which the client's principal
+must have in order to work with a bucket.
+
+
+### Read Access Permissions
+
+Permissions which must be granted when reading from a bucket:
+
+
+| Action | S3A operations |
+|--------|----------|
+| `s3:ListBucket` | `listStatus()`, `getFileStatus()` and elsewhere |
+| `s3:GetObject` | `getFileStatus()`, `open()` and elsewhere |
+| `s3:ListBucketMultipartUploads` |  Aborting/cleaning up S3A commit operations|
+
+
+The `s3:ListBucketMultipartUploads` is only needed when committing work
+via the [S3A committers](committers.html).
+However, it must be granted to the root path in order to safely clean up jobs.
+It is simplest to permit this in all buckets, even if it is only actually
+needed when writing data.
+
+
+### Write Access Permissions
+
+These permissions must *also* be granted for write access:
+
+
+| Action | S3A operations |
+|--------|----------|
+| `s3:PutObject` | `mkdir()`, `create()`, `rename()`, `delete()` |
+| `s3:DeleteObject` | `mkdir()`, `create()`, `rename()`, `delete()` |
+| `s3:AbortMultipartUpload` | S3A committer `abortJob()` and `cleanup()` operations |
+| `s3:ListMultipartUploadParts` | S3A committer `abortJob()` and `cleanup()` operations |
+
+
+### Mixed Permissions in a single S3 Bucket
+
+Mixing permissions down the "directory tree" is limited
+only to the extent of supporting writeable directories under
+read-only parent paths.
+
+*Disclaimer:* When a client lacks write access up the entire
+directory tree, there are no guarantees of consistent filesystem
+views or operations.
+
+Particular troublespots are "directory markers" and
+failures of non-atomic operations, particularly `rename()` and `delete()`.
+
+A directory marker such as `/users/` will not be deleted if the user `alice`
+creates a directory `/users/alice` *and* she only has access to `/users/alice`.
+
+When a path or directory is deleted, the parent directory may not exist afterwards.
+In the example above, if `alice` deletes `/users/alice` and there are no
+other entries under `/users/alice`, then the directory marker `/users/` cannot
+be created. The directory `/users` will not exist in listings,
+`getFileStatus("/users")` or similar.
+
+Rename will fail if it cannot delete the items it has just copied, that is
+`rename(read-only-source, writeable-dest)` will fail &mdash;but only after
+performing the COPY of the data.
+Even though the operation failed, for a single file copy, the destination
+file will exist.
+For a directory copy, only a partial copy of the source data may take place
+before the permission failure is raised.
+
+
+*S3Guard*: if [S3Guard](s3guard.html) is used to manage the directory listings,
+then after partial failures of rename/copy the DynamoDB tables can get out of sync.
+
+### Example: Read access to the base, R/W to the path underneath
+
+This example has the base bucket read only, and a directory underneath,
+`/users/alice/` granted full R/W access.
+
+```json
+{
+  "Version" : "2012-10-17",
+  "Statement" : [ {
+    "Sid" : "4",
+    "Effect" : "Allow",
+    "Action" : [
+      "s3:ListBucket",
+      "s3:ListBucketMultipartUploads",
+      "s3:GetObject"
+      ],
+    "Resource" : "arn:aws:s3:::example-bucket/*"
+  }, {
+    "Sid" : "5",
+    "Effect" : "Allow",
+    "Action" : [
+      "s3:Get*",
+      "s3:PutObject",
+      "s3:DeleteObject",
+      "s3:AbortMultipartUpload",
+      "s3:ListMultipartUploadParts" ],
+    "Resource" : [
+      "arn:aws:s3:::example-bucket/users/alice/*",
+      "arn:aws:s3:::example-bucket/users/alice",
+      "arn:aws:s3:::example-bucket/users/alice/"
+      ]
+  } ]
+}
+```
+
+Note how three resources are provided to represent the path `/users/alice`
+
+|  Path | Matches |
+|-------|----------|
+| `/users/alice` |  Any file `alice` created under `/users` |
+| `/users/alice/` |  The directory marker `alice/` created under `/users` |
+| `/users/alice/*` |  All files and directories under the path `/users/alice` |
+
+Note that the resource `arn:aws:s3:::example-bucket/users/alice*` cannot
+be used to refer to all of these paths, because it would also cover
+adjacent paths like `/users/alice2` and `/users/alicebob`.
+
+
 ## <a name="troubleshooting"></a> Troubleshooting Assumed Roles
 
 1. Make sure the role works and the user trying to enter it can do so from AWS
 the command line before trying to use the S3A client.
 1. Try to access the S3 bucket with reads and writes from the AWS CLI.
-1. Then, with the hadoop settings updated, try to read data from the `hadoop fs` CLI:
+1. With the Hadoop configuration set too use the role,
+ try to read data from the `hadoop fs` CLI:
 `hadoop fs -ls -p s3a://bucket/`
-1. Then, with the hadoop CLI, try to create a new directory with a request such as
+1. With the hadoop CLI, try to create a new directory with a request such as
 `hadoop fs -mkdirs -p s3a://bucket/path/p1/`
 
+
 ### <a name="no_role"></a>IOException: "Unset property fs.s3a.assumed.role.arn"
 
 The Assumed Role Credential Provider is enabled, but `fs.s3a.assumed.role.arn` is unset.
 
 ```
 java.io.IOException: Unset property fs.s3a.assumed.role.arn
-  at org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:76)
+  at org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:76)
   at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
   at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
   at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
@@ -201,7 +345,7 @@ This can arise if the role ARN set in `fs.s3a.assumed.role.arn` is invalid
 or one to which the caller has no access.
 
 ```
-java.nio.file.AccessDeniedException: : Instantiate org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider
+java.nio.file.AccessDeniedException: : Instantiate org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider
  on : com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException:
   Not authorized to perform sts:AssumeRole (Service: AWSSecurityTokenService; Status Code: 403;
    Error Code: AccessDenied; Request ID: aad4e59a-f4b0-11e7-8c78-f36aaa9457f6):AccessDenied
@@ -217,12 +361,12 @@ java.nio.file.AccessDeniedException: : Instantiate org.apache.hadoop.fs.s3a.Assu
 
 ### <a name="root_account"></a> "Roles may not be assumed by root accounts"
 
-You can't use assume a role with the root acount of an AWS account;
+You can't assume a role with the root account of an AWS account;
 you need to create a new user and give it the permission to change into
 the role.
 
 ```
-java.nio.file.AccessDeniedException: : Instantiate org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider
+java.nio.file.AccessDeniedException: : Instantiate org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider
  on : com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException:
     Roles may not be assumed by root accounts. (Service: AWSSecurityTokenService; Status Code: 403; Error Code: AccessDenied;
     Request ID: e86dfd8f-e758-11e7-88e7-ad127c04b5e2):
@@ -257,7 +401,7 @@ The value of `fs.s3a.assumed.role.session.duration` is out of range.
 ```
 java.lang.IllegalArgumentException: Assume Role session duration should be in the range of 15min - 1Hr
   at com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider$Builder.withRoleSessionDurationSeconds(STSAssumeRoleSessionCredentialsProvider.java:437)
-  at org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:86)
+  at org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:86)
 ```
 
 
@@ -268,7 +412,7 @@ The policy set in `fs.s3a.assumed.role.policy` is not valid according to the
 AWS specification of Role Policies.
 
 ```
-rg.apache.hadoop.fs.s3a.AWSBadRequestException: Instantiate org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider on :
+rg.apache.hadoop.fs.s3a.AWSBadRequestException: Instantiate org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider on :
  com.amazonaws.services.securitytoken.model.MalformedPolicyDocumentException:
   The policy is not in the valid JSON format. (Service: AWSSecurityTokenService; Status Code: 400;
    Error Code: MalformedPolicyDocument; Request ID: baf8cb62-f552-11e7-9768-9df3b384e40c):
@@ -308,8 +452,8 @@ Caused by: com.amazonaws.services.securitytoken.model.MalformedPolicyDocumentExc
   at com.amazonaws.auth.RefreshableTask.blockingRefresh(RefreshableTask.java:212)
   at com.amazonaws.auth.RefreshableTask.getValue(RefreshableTask.java:153)
   at com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider.getCredentials(STSAssumeRoleSessionCredentialsProvider.java:299)
-  at org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider.getCredentials(AssumedRoleCredentialProvider.java:127)
-  at org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:116)
+  at org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider.getCredentials(AssumedRoleCredentialProvider.java:127)
+  at org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:116)
   at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
   at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
   at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
@@ -324,7 +468,7 @@ The policy set in `fs.s3a.assumed.role.policy` is not valid JSON.
 
 ```
 org.apache.hadoop.fs.s3a.AWSBadRequestException:
-Instantiate org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider on :
+Instantiate org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider on :
  com.amazonaws.services.securitytoken.model.MalformedPolicyDocumentException:
   Syntax errors in policy. (Service: AWSSecurityTokenService;
   Status Code: 400; Error Code: MalformedPolicyDocument;
@@ -363,8 +507,8 @@ Instantiate org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider on :
   at com.amazonaws.auth.RefreshableTask.blockingRefresh(RefreshableTask.java:212)
   at com.amazonaws.auth.RefreshableTask.getValue(RefreshableTask.java:153)
   at com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider.getCredentials(STSAssumeRoleSessionCredentialsProvider.java:299)
-  at org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider.getCredentials(AssumedRoleCredentialProvider.java:127)
-  at org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:116)
+  at org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider.getCredentials(AssumedRoleCredentialProvider.java:127)
+  at org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:116)
   at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
   at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
   at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
@@ -380,7 +524,7 @@ You can't use the Assumed Role Credential Provider as the provider in
 
 ```
 java.io.IOException: AssumedRoleCredentialProvider cannot be in fs.s3a.assumed.role.credentials.provider
-  at org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:86)
+  at org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:86)
   at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
   at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
   at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
@@ -401,7 +545,7 @@ There's an space or other typo in the `fs.s3a.access.key` or `fs.s3a.secret.key`
 inner authentication which is breaking signature creation.
 
 ```
- org.apache.hadoop.fs.s3a.AWSBadRequestException: Instantiate org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider
+ org.apache.hadoop.fs.s3a.AWSBadRequestException: Instantiate org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider
   on : com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException:
    'valid/20180109/us-east-1/sts/aws4_request' not a valid key=value pair (missing equal-sign) in Authorization header:
     'AWS4-HMAC-SHA256 Credential=not valid/20180109/us-east-1/sts/aws4_request,
@@ -447,8 +591,8 @@ Caused by: com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceExc
   at com.amazonaws.auth.RefreshableTask.blockingRefresh(RefreshableTask.java:212)
   at com.amazonaws.auth.RefreshableTask.getValue(RefreshableTask.java:153)
   at com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider.getCredentials(STSAssumeRoleSessionCredentialsProvider.java:299)
-  at org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider.getCredentials(AssumedRoleCredentialProvider.java:127)
-  at org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:116)
+  at org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider.getCredentials(AssumedRoleCredentialProvider.java:127)
+  at org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:116)
   at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
   at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
   at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
@@ -463,7 +607,7 @@ The credentials used to authenticate with the AWS Simple Token Service are inval
 
 ```
 [ERROR] Failures:
-[ERROR] java.nio.file.AccessDeniedException: : Instantiate org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider on :
+[ERROR] java.nio.file.AccessDeniedException: : Instantiate org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider on :
  com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException:
   The security token included in the request is invalid.
   (Service: AWSSecurityTokenService; Status Code: 403; Error Code: InvalidClientTokenId;
@@ -501,8 +645,8 @@ The security token included in the request is invalid.
   at com.amazonaws.auth.RefreshableTask.blockingRefresh(RefreshableTask.java:212)
   at com.amazonaws.auth.RefreshableTask.getValue(RefreshableTask.java:153)
   at com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider.getCredentials(STSAssumeRoleSessionCredentialsProvider.java:299)
-  at org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider.getCredentials(AssumedRoleCredentialProvider.java:127)
-  at org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:116)
+  at org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider.getCredentials(AssumedRoleCredentialProvider.java:127)
+  at org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:116)
   at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
   at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
   at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
@@ -521,7 +665,7 @@ match these constraints.
 If set explicitly, it must be valid.
 
 ```
-org.apache.hadoop.fs.s3a.AWSBadRequestException: Instantiate org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider on
+org.apache.hadoop.fs.s3a.AWSBadRequestException: Instantiate org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider on
     com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException:
     1 validation error detected: Value 'Session Names cannot Hava Spaces!' at 'roleSessionName'
     failed to satisfy constraint: Member must satisfy regular expression pattern: [\w+=,.@-]*
@@ -584,8 +728,8 @@ Caused by: com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceExc
   at com.amazonaws.auth.RefreshableTask.blockingRefresh(RefreshableTask.java:212)
   at com.amazonaws.auth.RefreshableTask.getValue(RefreshableTask.java:153)
   at com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider.getCredentials(STSAssumeRoleSessionCredentialsProvider.java:299)
-  at org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider.getCredentials(AssumedRoleCredentialProvider.java:135)
-  at org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:124)
+  at org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider.getCredentials(AssumedRoleCredentialProvider.java:135)
+  at org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider.<init>(AssumedRoleCredentialProvider.java:124)
   at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
   at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
   at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
@@ -593,3 +737,61 @@ Caused by: com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceExc
   at org.apache.hadoop.fs.s3a.S3AUtils.createAWSCredentialProvider(S3AUtils.java:583)
   ... 26 more
 ```
+
+
+### <a name="access_denied"></a> `java.nio.file.AccessDeniedException` within a FileSystem API call
+
+If an operation fails with an `AccessDeniedException`, then the role does not have
+the permission for the S3 Operation invoked during the call
+
+```
+java.nio.file.AccessDeniedException: s3a://bucket/readonlyDir: rename(s3a://bucket/readonlyDir, s3a://bucket/renameDest)
+ on s3a://bucket/readonlyDir:
+  com.amazonaws.services.s3.model.AmazonS3Exception: Access Denied
+  (Service: Amazon S3; Status Code: 403; Error Code: AccessDenied; Request ID: 2805F2ABF5246BB1;
+   S3 Extended Request ID: iEXDVzjIyRbnkAc40MS8Sjv+uUQNvERRcqLsJsy9B0oyrjHLdkRKwJ/phFfA17Kjn483KSlyJNw=),
+   S3 Extended Request ID: iEXDVzjIyRbnkAc40MS8Sjv+uUQNvERRcqLsJsy9B0oyrjHLdkRKwJ/phFfA17Kjn483KSlyJNw=:AccessDenied
+  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:216)
+  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:143)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.rename(S3AFileSystem.java:853)
+ ...
+Caused by: com.amazonaws.services.s3.model.AmazonS3Exception: Access Denied
+ (Service: Amazon S3; Status Code: 403; Error Code: AccessDenied; Request ID: 2805F2ABF5246BB1;
+  S3 Extended Request ID: iEXDVzjIyRbnkAc40MS8Sjv+uUQNvERRcqLsJsy9B0oyrjHLdkRKwJ/phFfA17Kjn483KSlyJNw=),
+  S3 Extended Request ID: iEXDVzjIyRbnkAc40MS8Sjv+uUQNvERRcqLsJsy9B0oyrjHLdkRKwJ/phFfA17Kjn483KSlyJNw=
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1638)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1303)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1055)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:743)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:717)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
+  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
+  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4229)
+  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4176)
+  at com.amazonaws.services.s3.AmazonS3Client.deleteObject(AmazonS3Client.java:2066)
+  at com.amazonaws.services.s3.AmazonS3Client.deleteObject(AmazonS3Client.java:2052)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$deleteObject$7(S3AFileSystem.java:1338)
+  at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:314)
+  at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:280)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.deleteObject(S3AFileSystem.java:1334)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.removeKeys(S3AFileSystem.java:1657)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.innerRename(S3AFileSystem.java:1046)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.rename(S3AFileSystem.java:851)
+```
+
+This is the policy restriction behaving as intended: the caller is trying to
+perform an action which is forbidden.
+
+1. If a policy has been set in `fs.s3a.assumed.role.policy` then it must declare *all*
+permissions which the caller is allowed to perform. The existing role policies
+act as an outer constraint on what the caller can perform, but are not inherited.
+
+1. If the policy for a bucket is set up with complex rules on different paths,
+check the path for the operation.
+
+1. The policy may have omitted one or more actions which are required.
+Make sure that all the read and write permissions are allowed for any bucket/path
+to which data is being written to, and read permissions for all
+buckets read from.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md
index d2edce2..4924b45 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md
@@ -1043,24 +1043,25 @@ If this role is not set, the tests which require it will be skipped.
 
 To run the tests in `ITestAssumeRole`, you need:
 
-1. A role in your AWS account with the relevant access rights to
-the S3 buckets used in the tests, and ideally DynamoDB, for S3Guard.
+1. A role in your AWS account will full read and write access rights to
+the S3 bucket used in the tests, and ideally DynamoDB, for S3Guard.
 If your bucket is set up by default to use S3Guard, the role must have access
 to that service.
 
-1.  Your IAM User  to have the permissions to adopt that role.
+1.  Your IAM User to have the permissions to adopt that role.
 
 1. The role ARN must be set in `fs.s3a.assumed.role.arn`.
 
 ```xml
 <property>
   <name>fs.s3a.assumed.role.arn</name>
-  <value>arn:aws:kms:eu-west-1:00000000000:key/0000000-16c9-4832-a1a9-c8bbef25ec8b</value>
+  <value>arn:aws:iam::9878543210123:role/role-s3-restricted</value>
 </property>
 ```
 
-The tests don't do much other than verify that basic file IO works with the role,
-and trigger various failures.
+The tests assume the role with different subsets of permissions and verify
+that the S3A client (mostly) works when the caller has only write access
+to part of the directory tree.
 
 You can also run the entire test suite in an assumed role, a more
 thorough test, by switching to the credentials provider.
@@ -1068,7 +1069,7 @@ thorough test, by switching to the credentials provider.
 ```xml
 <property>
   <name>fs.s3a.aws.credentials.provider</name>
-  <value>org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider</value>
+  <value>org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider</value>
 </property>
 ```
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCpAssumedRole.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCpAssumedRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCpAssumedRole.java
deleted file mode 100644
index 94e7adf..0000000
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCpAssumedRole.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.fs.contract.s3a;
-
-import org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider;
-
-import static org.apache.hadoop.fs.s3a.Constants.ASSUMED_ROLE_ARN;
-import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume;
-import static org.apache.hadoop.fs.s3a.S3ATestUtils.authenticationContains;
-
-/**
- * Run DistCP under an assumed role.
- * This is skipped if the FS is already set to run under an assumed role,
- * because it would duplicate that of the superclass.
- */
-public class ITestS3AContractDistCpAssumedRole extends ITestS3AContractDistCp {
-
-  @Override
-  public void setup() throws Exception {
-
-    super.setup();
-    // check for the fs having assumed roles
-    assume("No ARN for role tests", !getAssumedRoleARN().isEmpty());
-    assume("Already running as an assumed role",
-        !authenticationContains(getFileSystem().getConf(),
-            AssumedRoleCredentialProvider.NAME));
-  }
-
-  /**
-   * Probe for an ARN for the test FS.
-   * @return any ARN for the (previous created) filesystem.
-   */
-  private String getAssumedRoleARN() {
-    return getFileSystem().getConf().getTrimmed(ASSUMED_ROLE_ARN, "");
-  }
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[03/21] hadoop git commit: HADOOP-15090. Add ADL troubleshooting doc. Contributed by Steve Loughran.

Posted by ha...@apache.org.

HADOOP-15090. Add ADL troubleshooting doc.
Contributed by Steve Loughran.

(cherry picked from commit 58a2120e8a31307f19551f87be4e81d4fb626de1)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c9a373fb
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c9a373fb
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c9a373fb

Branch: refs/heads/HDFS-12996
Commit: c9a373fb14bbf826324c2547397f82b73bd466f4
Parents: 6ea7d78
Author: Steve Loughran <st...@apache.org>
Authored: Thu Feb 15 14:26:00 2018 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Feb 15 14:26:00 2018 +0000

----------------------------------------------------------------------
 .../src/site/markdown/index.md                  |   4 +
 .../src/site/markdown/troubleshooting_adl.md    | 146 +++++++++++++++++++
 2 files changed, 150 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/c9a373fb/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
index ca79321..d2b6edf 100644
--- a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
+++ b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
@@ -22,6 +22,10 @@ The `hadoop-azure-datalake` module provides support for integration with the
 [Azure Data Lake Store](https://azure.microsoft.com/en-in/documentation/services/data-lake-store/).
 This support comes via the JAR file `azure-datalake-store.jar`.
 
+### Related Documents
+
+* [Troubleshooting](troubleshooting_adl.html).
+
 ## Features
 
 * Read and write data stored in an Azure Data Lake Storage account.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c9a373fb/hadoop-tools/hadoop-azure-datalake/src/site/markdown/troubleshooting_adl.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/troubleshooting_adl.md b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/troubleshooting_adl.md
new file mode 100644
index 0000000..80b2a6f
--- /dev/null
+++ b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/troubleshooting_adl.md
@@ -0,0 +1,146 @@
+<!---
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+# Troubleshooting ADL
+
+<!-- MACRO{toc|fromDepth=1|toDepth=3} -->
+
+
+## Error messages
+
+
+### Error fetching access token:
+
+You aren't authenticated.
+
+### Error fetching access token:  JsonParseException
+
+This means a problem talking to the oauth endpoint.
+
+
+```
+Operation null failed with exception com.fasterxml.jackson.core.JsonParseException : Unexpected character ('<' (code 60)): expected a valid value (number, String, array, object, 'true', 'false' or 'null')
+  at [Source: sun.net.www.protocol.http.HttpURLConnection$HttpInputStream@211d30ed; line: 3, column: 2]
+  Last encountered exception thrown after 5 tries. [com.fasterxml.jackson.core.JsonParseException,com.fasterxml.jackson.core.JsonParseException,com.fasterxml.jackson.core.JsonParseException,com.fasterxml.jackson.core.JsonParseException,com.fasterxml.jackson.core.JsonParseException]
+  [ServerRequestId:null]
+  at com.microsoft.azure.datalake.store.ADLStoreClient.getExceptionFromResponse(ADLStoreClient.java:1147)
+  at com.microsoft.azure.datalake.store.ADLStoreClient.getDirectoryEntry(ADLStoreClient.java:725)
+  at org.apache.hadoop.fs.adl.AdlFileSystem.getFileStatus(AdlFileSystem.java:476)
+  at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1713)
+  at org.apache.hadoop.fs.contract.ContractTestUtils.rm(ContractTestUtils.java:397)
+  at org.apache.hadoop.fs.contract.ContractTestUtils.cleanup(ContractTestUtils.java:374)
+  at org.apache.hadoop.fs.contract.AbstractFSContractTestBase.deleteTestDirInTeardown(AbstractFSContractTestBase.java:213)
+  at org.apache.hadoop.fs.contract.AbstractFSContractTestBase.teardown(AbstractFSContractTestBase.java:204)
+  at org.apache.hadoop.fs.contract.AbstractContractOpenTest.teardown(AbstractContractOpenTest.java:64)
+  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
+  at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
+  at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
+  at java.lang.reflect.Method.invoke(Method.java:498)
+  at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
+  at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
+  at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
+  at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:33)
+  at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
+  at org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74)
+  ```
+
+The endpoint for token refresh is wrong; the web site at the far end is returning HTML, which breaks the JSON parser.
+Fix: get the right endpoint from the web UI; make sure it ends in `oauth2/token`.
+
+If there is a proxy betwen the application and ADL, make sure that the JVM proxy
+settings are correct.
+
+### `UnknownHostException : yourcontainer.azuredatalakestore.net`
+
+The name of the ADL container is wrong, and does not resolve to any known container.
+
+
+```
+Operation MKDIRS failed with exception java.net.UnknownHostException : yourcontainer.azuredatalakestore.net
+Last encountered exception thrown after 5 tries. [java.net.UnknownHostException,java.net.UnknownHostException,java.net.UnknownHostException,java.net.UnknownHostException,java.net.UnknownHostException]
+  [ServerRequestId:null]
+  at com.microsoft.azure.datalake.store.ADLStoreClient.getExceptionFromResponse(ADLStoreClient.java:1147)
+  at com.microsoft.azure.datalake.store.ADLStoreClient.createDirectory(ADLStoreClient.java:582)
+  at org.apache.hadoop.fs.adl.AdlFileSystem.mkdirs(AdlFileSystem.java:598)
+  at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:2305)
+  at org.apache.hadoop.fs.contract.AbstractFSContractTestBase.mkdirs(AbstractFSContractTestBase.java:338)
+  at org.apache.hadoop.fs.contract.AbstractFSContractTestBase.setup(AbstractFSContractTestBase.java:193)
+  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
+  at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
+  at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
+  at java.lang.reflect.Method.invoke(Method.java:498)
+  at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
+  at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
+  at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
+  at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:24)
+  at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
+  at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
+  at org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74)
+Caused by: java.net.UnknownHostException: yourcontainer.azuredatalakestore.net
+  at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:184)
+  at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
+  at java.net.Socket.connect(Socket.java:589)
+  at sun.security.ssl.SSLSocketImpl.connect(SSLSocketImpl.java:668)
+  at sun.net.NetworkClient.doConnect(NetworkClient.java:175)
+  at sun.net.www.http.HttpClient.openServer(HttpClient.java:432)
+  at sun.net.www.http.HttpClient.openServer(HttpClient.java:527)
+  at sun.net.www.protocol.https.HttpsClient.<init>(HttpsClient.java:264)
+  at sun.net.www.protocol.https.HttpsClient.New(HttpsClient.java:367)
+  at sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.getNewHttpClient(AbstractDelegateHttpsURLConnection.java:191)
+  at sun.net.www.protocol.http.HttpURLConnection.plainConnect0(HttpURLConnection.java:1138)
+  at sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:1032)
+  at sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(AbstractDelegateHttpsURLConnection.java:177)
+  at sun.net.www.protocol.http.HttpURLConnection.getOutputStream0(HttpURLConnection.java:1316)
+  at sun.net.www.protocol.http.HttpURLConnection.getOutputStream(HttpURLConnection.java:1291)
+  at sun.net.www.protocol.https.HttpsURLConnectionImpl.getOutputStream(HttpsURLConnectionImpl.java:250)
+  at com.microsoft.azure.datalake.store.HttpTransport.makeSingleCall(HttpTransport.java:273)
+  at com.microsoft.azure.datalake.store.HttpTransport.makeCall(HttpTransport.java:91)
+  at com.microsoft.azure.datalake.store.Core.mkdirs(Core.java:399)
+  at com.microsoft.azure.datalake.store.ADLStoreClient.createDirectory(ADLStoreClient.java:580)
+  ... 15 more
+```
+
+### ACL verification failed
+
+
+You are logged in but have no access to the ADL container.
+
+```
+[ERROR] testOpenReadZeroByteFile(org.apache.hadoop.fs.adl.live.TestAdlContractOpenLive)  Time elapsed: 3.392 s  <<< ERROR!
+org.apache.hadoop.security.AccessControlException: MKDIRS failed with error 0x83090aa2 (Forbidden. ACL verification failed. Either the resource does not exist or the user is not authorized to perform the requested operation.). [709ad9f6-725f-45a8-8231-e9327c52e79f][2017-11-28T07:06:30.3068084-08:00] [ServerRequestId:709ad9f6-725f-45a8-8231-e9327c52e79f]
+  at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
+  at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
+  at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
+  at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
+  at com.microsoft.azure.datalake.store.ADLStoreClient.getRemoteException(ADLStoreClient.java:1167)
+  at com.microsoft.azure.datalake.store.ADLStoreClient.getExceptionFromResponse(ADLStoreClient.java:1132)
+  at com.microsoft.azure.datalake.store.ADLStoreClient.createDirectory(ADLStoreClient.java:582)
+  at org.apache.hadoop.fs.adl.AdlFileSystem.mkdirs(AdlFileSystem.java:598)
+  at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:2305)
+  at org.apache.hadoop.fs.contract.AbstractFSContractTestBase.mkdirs(AbstractFSContractTestBase.java:338)
+  at org.apache.hadoop.fs.contract.AbstractFSContractTestBase.setup(AbstractFSContractTestBase.java:193)
+  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
+  at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
+  at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
+  at java.lang.reflect.Method.invoke(Method.java:498)
+  at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
+  at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
+  at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
+  at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:24)
+  at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
+  at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
+  at org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74)
+```
+
+See "Adding the service principal to your ADL Account".


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[08/21] hadoop git commit: xattr api cleanup

Posted by ha...@apache.org.

xattr api cleanup


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/da59acd8
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/da59acd8
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/da59acd8

Branch: refs/heads/HDFS-12996
Commit: da59acd8ca9ab5b49b988ffca64e8cce91c5f741
Parents: 481d79f
Author: Kihwal Lee <ki...@apache.org>
Authored: Thu Feb 15 11:11:55 2018 -0600
Committer: Kihwal Lee <ki...@apache.org>
Committed: Thu Feb 15 11:11:55 2018 -0600

----------------------------------------------------------------------
 .../hdfs/server/namenode/FSDirXAttrOp.java      |  3 +-
 .../hdfs/server/namenode/FSXAttrBaseTest.java   | 63 ++++++++++++++++----
 2 files changed, 51 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/da59acd8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java
index 3223467..be3092c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java
@@ -137,8 +137,7 @@ class FSDirXAttrOp {
     final boolean isRawPath = FSDirectory.isReservedRawName(src);
     final INodesInPath iip = fsd.resolvePath(pc, src, DirOp.READ);
     if (fsd.isPermissionEnabled()) {
-      /* To access xattr names, you need EXECUTE in the owning directory. */
-      fsd.checkParentAccess(pc, iip, FsAction.EXECUTE);
+      fsd.checkPathAccess(pc, iip, FsAction.READ);
     }
     final List<XAttr> all = FSDirXAttrOp.getXAttrs(fsd, iip);
     return XAttrPermissionFilter.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/da59acd8/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSXAttrBaseTest.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSXAttrBaseTest.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSXAttrBaseTest.java
index 43eeadf..b5f7573 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSXAttrBaseTest.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSXAttrBaseTest.java
@@ -843,28 +843,37 @@ public class FSXAttrBaseTest {
     }
 
     /*
-     * Check that execute/scan access to the parent dir is sufficient to get
-     * xattr names.
+     * Check that execute/scan access to the parent dir is not
+     * sufficient to get xattr names.
      */
     fs.setPermission(path, new FsPermission((short) 0701));
     user.doAs(new PrivilegedExceptionAction<Object>() {
         @Override
         public Object run() throws Exception {
+        try {
           final FileSystem userFs = dfsCluster.getFileSystem();
           userFs.listXAttrs(childDir);
-          return null;
+          fail("expected AccessControlException");
+        } catch (AccessControlException ace) {
+          GenericTestUtils.assertExceptionContains("Permission denied", ace);
         }
+        return null;
+      }
       });
 
     /*
      * Test that xattrs in the "trusted" namespace are filtered correctly.
      */
+    // Allow the user to read child path.
+    fs.setPermission(childDir, new FsPermission((short) 0704));
     fs.setXAttr(childDir, "trusted.myxattr", "1234".getBytes());
     user.doAs(new PrivilegedExceptionAction<Object>() {
         @Override
         public Object run() throws Exception {
           final FileSystem userFs = dfsCluster.getFileSystem();
-          assertTrue(userFs.listXAttrs(childDir).size() == 1);
+          List<String> xattrs = userFs.listXAttrs(childDir);
+          assertTrue(xattrs.size() == 1);
+          assertEquals(name1, xattrs.get(0));
           return null;
         }
       });
@@ -1109,20 +1118,48 @@ public class FSXAttrBaseTest {
             }
 
             /*
-            * Test that only user who have parent directory execute access
-            *  can see raw.* xattrs returned from listXAttr
+            * Test that user who have parent directory execute access
+            *  can also not see raw.* xattrs returned from listXAttr
             */
-            // non-raw path
-            final List<String> xattrNames = userFs.listXAttrs(path);
-            assertTrue(xattrNames.size() == 0);
+            try {
+              // non-raw path
+              userFs.listXAttrs(path);
+              fail("listXAttr should have thrown AccessControlException");
+            } catch (AccessControlException ace) {
+              // expected
+            }
 
-            // raw path
-            List<String> rawXattrs = userFs.listXAttrs(rawPath);
-            assertTrue(rawXattrs.size() == 1);
-            assertTrue(rawXattrs.get(0).equals(raw1));
+            try {
+              // raw path
+              userFs.listXAttrs(rawPath);
+              fail("listXAttr should have thrown AccessControlException");
+            } catch (AccessControlException ace) {
+              // expected
+            }
             return null;
           }
         });
+      /*
+        Test user who have read access can list xattrs in "raw.*" namespace
+       */
+      fs.setPermission(path, new FsPermission((short) 0751));
+      final Path childDir = new Path(path, "child" + pathCount);
+      FileSystem.mkdirs(fs, childDir, FsPermission.createImmutable((short)
+          0704));
+      final Path rawChildDir =
+          new Path("/.reserved/raw" + childDir.toString());
+      fs.setXAttr(rawChildDir, raw1, value1);
+      user.doAs(new PrivilegedExceptionAction<Object>() {
+        @Override
+        public Object run() throws Exception {
+          final FileSystem userFs = dfsCluster.getFileSystem();
+          // raw path
+          List<String> xattrs = userFs.listXAttrs(rawChildDir);
+          assertEquals(1, xattrs.size());
+          assertEquals(raw1, xattrs.get(0));
+          return null;
+        }
+      });
       fs.removeXAttr(rawPath, raw1);
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[11/21] hadoop git commit: YARN-7920. Simplify configuration for PlacementConstraints. Contributed by Wangda Tan.

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/PlacementConstraints.md
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/PlacementConstraints.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/PlacementConstraints.md
new file mode 100644
index 0000000..6af62e7
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/PlacementConstraints.md
@@ -0,0 +1,136 @@
+<!---
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+Placement Constraints
+=====================
+
+<!-- MACRO{toc|fromDepth=0|toDepth=3} -->
+
+
+Overview
+--------
+
+YARN allows applications to specify placement constraints in the form of data locality (preference to specific nodes or racks) or (non-overlapping) node labels. This document focuses on more expressive placement constraints in YARN. Such constraints can be crucial for the performance and resilience of applications, especially those that include long-running containers, such as services, machine-learning and streaming workloads.
+
+For example, it may be beneficial to co-locate the allocations of a job on the same rack (*affinity* constraints) to reduce network costs, spread allocations across machines (*anti-affinity* constraints) to minimize resource interference, or allow up to a specific number of allocations in a node group (*cardinality* constraints) to strike a balance between the two. Placement decisions also affect resilience. For example, allocations placed within the same cluster upgrade domain would go offline simultaneously.
+
+The applications can specify constraints without requiring knowledge of the underlying topology of the cluster (e.g., one does not need to specify the specific node or rack where their containers should be placed with constraints) or the other applications deployed. Currently **intra-application** constraints are supported, but the design that is followed is generic and support for constraints across applications will soon be added. Moreover, all constraints at the moment are **hard**, that is, if the constraints for a container cannot be satisfied due to the current cluster condition or conflicting constraints, the container request will remain pending or get will get rejected.
+
+Note that in this document we use the notion of “allocation” to refer to a unit of resources (e.g., CPU and memory) that gets allocated in a node. In the current implementation of YARN, an allocation corresponds to a single container. However, in case an application uses an allocation to spawn more than one containers, an allocation could correspond to multiple containers.
+
+
+Quick Guide
+-----------
+
+We first describe how to enable scheduling with placement constraints and then provide examples of how to experiment with this feature using the distributed shell, an application that allows to run a given shell command on a set of containers.
+
+### Enabling placement constraints
+
+To enable placement constraints, the following property has to be set to `placement-processor` or `scheduler` in **conf/yarn-site.xml**:
+
+| Property | Description | Default value |
+|:-------- |:----------- |:------------- |
+| `yarn.resourcemanager.placement-constraints.handler` | Specify which handler will be used to process PlacementConstraints. Acceptable values are: `placement-processor`, `scheduler`, and `disabled`. | `disabled` |
+
+We now give more details about each of the three placement constraint handlers:
+
+* `placement-processor`: Using this handler, the placement of containers with constraints is determined as a pre-processing step before the capacity or the fair scheduler is called. Once the placement is decided, the capacity/fair scheduler is invoked to perform the actual allocation. The advantage of this handler is that it supports all constraint types (affinity, anti-affinity, cardinality). Moreover, it considers multiple containers at a time, which allows to satisfy more constraints than a container-at-a-time approach can achieve. As it sits outside the main scheduler, it can be used by both the capacity and fair schedulers. Note that at the moment it does not account for task priorities within an application, given that such priorities might be conflicting with the placement constraints.
+* `scheduler`: Using this handler, containers with constraints will be placed by the main scheduler (as of now, only the capacity scheduler supports SchedulingRequests). It currently supports anti-affinity constraints (no affinity or cardinality). The advantage of this handler, when compared to the `placement-processor`, is that it follows the same ordering rules for queues (sorted by utilization, priority), apps (sorted by FIFO/fairness/priority) and tasks within the same app (priority) that are enforced by the existing main scheduler.
+* `disabled`: Using this handler, if a SchedulingRequest is asked by an application, the corresponding allocate call will be rejected.
+
+The `placement-processor` handler supports a wider range of constraints and can allow more containers to be placed, especially when applications have demanding constraints or the cluster is highly-utilized (due to considering multiple containers at a time). However, if respecting task priority within an application is important for the user and the capacity scheduler is used, then the `scheduler` handler should be used instead.
+
+### Experimenting with placement constraints using distributed shell
+
+Users can experiment with placement constraints by using the distributed shell application through the following command:
+
+```
+$ yarn org.apache.hadoop.yarn.applications.distributedshell.Client -jar share/hadoop/yarn/hadoop-yarn-applications-distributedshell-${project.version}.jar -shell_command sleep -shell_args 10 -placement_spec PlacementSpec
+```
+
+where **PlacementSpec** is of the form:
+
+```
+PlacementSpec => "" | KeyVal;PlacementSpec
+KeyVal        => SourceTag=Constraint
+SourceTag     => String
+Constraint    => NumContainers | NumContainers,"IN",Scope,TargetTag | NumContainers,"NOTIN",Scope,TargetTag | NumContainers,"CARDINALITY",Scope,TargetTag,MinCard,MaxCard
+NumContainers => int
+Scope         => "NODE" | "RACK"
+TargetTag     => String
+MinCard       => int
+MaxCard       => int
+```
+
+Note that when the `-placement_spec` argument is specified in the distributed shell command, the `-num-containers` argument should not be used. In case `-num-containers` argument is used in conjunction with `-placement-spec`, the former is ignored. This is because in PlacementSpec, we determine the number of containers per tag, making the `-num-containers` redundant and possibly conflicting. Moreover, if `-placement_spec` is used, all containers will be requested with GUARANTEED execution type.
+
+An example of PlacementSpec is the following:
+```
+zk=3,NOTIN,NODE,zk:hbase=5,IN,RACK,zk:spark=7,CARDINALITY,NODE,hbase,1,3
+```
+The above encodes two constraints:
+* place 3 containers with tag "zk" (standing for ZooKeeper) with node anti-affinity to each other, i.e., do not place more than one container per node (notice that in this first constraint, the SourceTag and the TargetTag of the constraint coincide);
+* place 5 containers with tag "hbase" with affinity to a rack on which containers with tag "zk" are running (i.e., an "hbase" container should not be placed at a rack where an "zk" container is running, given that "zk" is the TargetTag of the second constraint);
+* place 7 container with tag "spark" in nodes that have at least one, but no more than three, containers, with tag "hbase".
+
+
+
+Defining Placement Constraints
+------------------------------
+
+### Allocation tags
+
+Allocation tags are string tags that an application can associate with (groups of) its containers. Tags are used to identify components of applications. For example, an HBase Master allocation can be tagged with "hbase-m", and Region Servers with "hbase-rs". Other examples are "latency-critical" to refer to the more general demands of the allocation, or "app_0041" to denote the job ID. Allocation tags play a key role in constraints, as they allow to refer to multiple allocations that share a common tag.
+
+Note that instead of using the `ResourceRequest` object to define allocation tags, we use the new `SchedulingRequest` object. This has many similarities with the `ResourceRequest`, but better separates the sizing of the requested allocations (number and size of allocations, priority, execution type, etc.), and the constraints dictating how these allocations should be placed (resource name, relaxed locality). Applications can still use `ResourceRequest` objects, but in order to define allocation tags and constraints, they need to use the `SchedulingRequest` object. Within a single `AllocateRequest`, an application should use either the `ResourceRequest` or the `SchedulingRequest` objects, but not both of them.
+
+#### Differences between node labels, node attributes and allocation tags
+
+The difference between allocation tags and node labels or node attributes (YARN-3409), is that allocation tags are attached to allocations and not to nodes. When an allocation gets allocated to a node by the scheduler, the set of tags of that allocation are automatically added to the node for the duration of the allocation. Hence, a node inherits the tags of the allocations that are currently allocated to the node. Likewise, a rack inherits the tags of its nodes. Moreover, similar to node labels and unlike node attributes, allocation tags have no value attached to them. As we show below, our constraints can refer to allocation tags, as well as node labels and node attributes.
+
+
+### Placement constraints API
+
+Applications can use the public API in the `PlacementConstraints` to construct placement constraint. Before describing the methods for building constraints, we describe the methods of the `PlacementTargets` class that are used to construct the target expressions that will then be used in constraints:
+
+| Method | Description |
+|:------ |:----------- |
+| `allocationTag(String... allocationTags)` | Constructs a target expression on an allocation tag. It is satisfied if there are allocations with one of the given tags. |
+| `allocationTagToIntraApp(String... allocationTags)` | similar to `allocationTag(String...)`, but targeting only the containers of the application that will use this target (intra-application constraints). |
+| `nodePartition(String... nodePartitions)` | Constructs a target expression on a node partition. It is satisfied for nodes that belong to one of the `nodePartitions`. |
+| `nodeAttribute(String attributeKey, String... attributeValues)` | Constructs a target expression on a node attribute. It is satisfied if the specified node attribute has one of the specified values. |
+
+Note that the `nodeAttribute` method above is not yet functional, as it requires the ongoing node attributes feature.
+
+The methods of the `PlacementConstraints` class for building constraints are the following:
+
+| Method | Description |
+|:------ |:----------- |
+| `targetIn(String scope, TargetExpression... targetExpressions)` | Creates a constraint that requires allocations to be placed on nodes that satisfy all target expressions within the given scope (e.g., node or rack). For example, `targetIn(RACK, allocationTag("hbase-m"))`, allows allocations on nodes that belong to a rack that has at least one allocation with tag "hbase-m". |
+| `targetNotIn(String scope, TargetExpression... targetExpressions)` | Creates a constraint that requires allocations to be placed on nodes that belong to a scope (e.g., node or rack) that does not satisfy any of the target expressions. |
+| `cardinality(String scope, int minCardinality, int maxCardinality, String... allocationTags)` | Creates a constraint that restricts the number of allocations within a given scope (e.g., node or rack). For example, {@code cardinality(NODE, 3, 10, "zk")} is satisfied on nodes where there are no less than 3 allocations with tag "zk" and no more than 10. |
+| `minCardinality(String scope, int minCardinality, String... allocationTags)` | Similar to `cardinality(String, int, int, String...)`, but determines only the minimum cardinality (the maximum cardinality is unbound). |
+| `maxCardinality(String scope, int maxCardinality, String... allocationTags)` | Similar to `cardinality(String, int, int, String...)`, but determines only the maximum cardinality (the minimum cardinality is 0). |
+| `targetCardinality(String scope, int minCardinality, int maxCardinality, String... allocationTags)` | This constraint generalizes the cardinality and target constraints. Consider a set of nodes N that belongs to the scope specified in the constraint. If the target expressions are satisfied at least minCardinality times and at most maxCardinality times in the node set N, then the constraint is satisfied. For example, `targetCardinality(RACK, 2, 10, allocationTag("zk"))`, requires an allocation to be placed within a rack that has at least 2 and at most 10 other allocations with tag "zk". |
+
+The `PlacementConstraints` class also includes method for building compound constraints (AND/OR expressions with multiple constraints). Adding support for compound constraints is work in progress.
+
+
+### Specifying constraints in applications
+
+Applications have to specify the containers for which each constraint will be enabled. To this end, applications can provide a mapping from a set of allocation tags (source tags) to a placement constraint. For example, an entry of this mapping could be "hbase"->constraint1, which means that constraint1 will be applied when scheduling each allocation with tag "hbase".
+
+When using the `placement-processor` handler (see [Enabling placement constraints](#Enabling_placement_constraints)), this constraint mapping is specified within the `RegisterApplicationMasterRequest`.
+
+When using the `scheduler` handler, the constraints can also be added at each `SchedulingRequest` object. Each such constraint is valid for the tag of that scheduling request. In case constraints are specified both at the `RegisterApplicationMasterRequest` and the scheduling requests, the latter override the former.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/PlacementConstraints.md.vm
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/PlacementConstraints.md.vm b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/PlacementConstraints.md.vm
deleted file mode 100644
index 7926eab..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/PlacementConstraints.md.vm
+++ /dev/null
@@ -1,149 +0,0 @@
-<!---
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License. See accompanying LICENSE file.
--->
-
-#set ( $H3 = '###' )
-#set ( $H4 = '####' )
-#set ( $H5 = '#####' )
-
-Placement Constraints
-=====================
-
-<!-- MACRO{toc|fromDepth=0|toDepth=3} -->
-
-
-Overview
---------
-
-YARN allows applications to specify placement constraints in the form of data locality (preference to specific nodes or racks) or (non-overlapping) node labels. This document focuses on more expressive placement constraints in YARN. Such constraints can be crucial for the performance and resilience of applications, especially those that include long-running containers, such as services, machine-learning and streaming workloads.
-
-For example, it may be beneficial to co-locate the allocations of a job on the same rack (*affinity* constraints) to reduce network costs, spread allocations across machines (*anti-affinity* constraints) to minimize resource interference, or allow up to a specific number of allocations in a node group (*cardinality* constraints) to strike a balance between the two. Placement decisions also affect resilience. For example, allocations placed within the same cluster upgrade domain would go offline simultaneously.
-
-The applications can specify constraints without requiring knowledge of the underlying topology of the cluster (e.g., one does not need to specify the specific node or rack where their containers should be placed with constraints) or the other applications deployed. Currently **intra-application** constraints are supported, but the design that is followed is generic and support for constraints across applications will soon be added. Moreover, all constraints at the moment are **hard**, that is, if the constraints for a container cannot be satisfied due to the current cluster condition or conflicting constraints, the container request gets rejected.
-
-Note that in this document we use the notion of “allocation” to refer to a unit of resources (e.g., CPU and memory) that gets allocated in a node. In the current implementation of YARN, an allocation corresponds to a single container. However, in case an application uses an allocation to spawn more than one containers, an allocation could correspond to multiple containers.
-
-
-Quick Guide
------------
-
-We first describe how to enable scheduling with placement constraints and then provide examples of how to experiment with this feature using the distributed shell, an application that allows to run a given shell command on a set of containers.
-
-$H3 Enabling placement constraints
-
-To enable placement constraints, the following property has to be set to **true** in **conf/yarn-site.xml**:
-
-| Property | Description | Default value |
-|:-------- |:----------- |:------------- |
-| `yarn.resourcemanager.placement-constraints.enabled` | Enables rich placement constraints. | `false` |
-
-
-Further, the user can choose between the following two alternatives for placing containers with constraints:
-
-* **Placement processor:** Following this approach, the placement of containers with constraints is determined as a pre-processing step before the capacity or the fair scheduler is called. Once the placement is decided, the capacity/fair scheduler is invoked to perform the actual allocation. The advantage of this approach is that it supports all constraint types (affinity, anti-affinity, cardinality). Moreover, it considers multiple containers at a time, which allows to satisfy more constraints than a container-at-a-time approach can achieve. As it sits outside the main scheduler, it can be used by both the capacity and fair schedulers. Note that at the moment it does not account for task priorities within an application, given that such priorities might be conflicting with the placement constraints.
-* **Placement allocator in capacity scheduler:** This approach places containers with constraints within the capacity scheduler. It currently supports anti-affinity constraints (no affinity or cardinality) and places one container at a time. However, it supports traditional task priorities within an application.
-
-The placement processor approach supports a wider range of constraints and can allow more containers to be placed especially when applications have demanding constraints or the cluster is highly-utilized (due to considering multiple containers at a time). However, if respecting task priority within an application is important for the user and the capacity scheduler is used, then the placement allocator in the capacity scheduler should be used instead.
-
-By default, the placement processor approach is enabled. To use the placement allocator in the capacity scheduler instead, the following parameter has to be set to **true** in the **conf/capacity-scheduler.xml**:
-
-| Property | Description | Default value |
-|:-------- |:----------- |:------------- |
-| `yarn.scheduler.capacity.scheduling-request.allowed` | When set to false, the placement processor is used; when set to true, the allocator inside the capacity scheduler is used. | `false` |
-
-
-
-$H3 Experimenting with placement constraints using distributed shell
-
-Users can experiment with placement constraints by using the distributed shell application through the following command:
-
-```
-$ yarn org.apache.hadoop.yarn.applications.distributedshell.Client -jar share/hadoop/yarn/hadoop-yarn-applications-distributedshell-${project.version}.jar -shell_command sleep -shell_args 10 -placement_spec PlacementSpec
-```
-
-where **PlacementSpec** is of the form:
-
-```
-PlacementSpec => "" | KeyVal;PlacementSpec
-KeyVal        => SourceTag=Constraint
-SourceTag     => String
-Constraint    => NumContainers | NumContainers,"IN",Scope,TargetTag | NumContainers,"NOTIN",Scope,TargetTag | NumContainers,"CARDINALITY",Scope,TargetTag,MinCard,MaxCard
-NumContainers => int
-Scope         => "NODE" | "RACK"
-TargetTag     => String
-MinCard       => int
-MaxCard       => int
-```
-
-Note that when the `-placement_spec` argument is specified in the distributed shell command, the `-num-containers` argument should not be used. In case `-num-containers` argument is used in conjunction with `-placement-spec`, the former is ignored. This is because in PlacementSpec, we determine the number of containers per tag, making the `-num-containers` redundant and possibly conflicting. Moreover, if `-placement_spec` is used, all containers will be requested with GUARANTEED execution type.
-
-An example of PlacementSpec is the following:
-```
-zk=3,NOTIN,NODE,zk:hbase=5,IN,RACK,zk:spark=7,CARDINALITY,NODE,hbase,1,3
-```
-The above encodes two constraints:
-* place 3 containers with tag "zk" (standing for ZooKeeper) with node anti-affinity to each other, i.e., do not place more than one container per node (notice that in this first constraint, the SourceTag and the TargetTag of the constraint coincide);
-* place 5 containers with tag "hbase" with affinity to a rack on which containers with tag "zk" are running (i.e., an "hbase" container should not be placed at a rack where an "zk" container is running, given that "zk" is the TargetTag of the second constraint);
-* place 7 container with tag "spark" in nodes that have at least one, but no more than three, containers, with tag "hbase".
-
-
-
-Defining Placement Constraints
-------------------------------
-
-$H3 Allocation tags
-
-Allocation tags are string tags that an application can associate with (groups of) its containers. Tags are used to identify components of applications. For example, an HBase Master allocation can be tagged with "hbase-m", and Region Servers with "hbase-rs". Other examples are "latency-critical" to refer to the more general demands of the allocation, or "app_0041" to denote the job ID. Allocation tags play a key role in constraints, as they allow to refer to multiple allocations that share a common tag.
-
-Note that instead of using the `ResourceRequest` object to define allocation tags, we use the new `SchedulingRequest` object. This has many similarities with the `ResourceRequest`, but better separates the sizing of the requested allocations (number and size of allocations, priority, execution type, etc.), and the constraints dictating how these allocations should be placed (resource name, relaxed locality). Applications can still use `ResourceRequest` objects, but in order to define allocation tags and constraints, they need to use the `SchedulingRequest` object. Within a single `AllocateRequest`, an application should use either the `ResourceRequest` or the `SchedulingRequest` objects, but not both of them.
-
-$H4 Differences between node labels, node attributes and allocation tags
-
-The difference between allocation tags and node labels or node attributes (YARN-3409), is that allocation tags are attached to allocations and not to nodes. When an allocation gets allocated to a node by the scheduler, the set of tags of that allocation are automatically added to the node for the duration of the allocation. Hence, a node inherits the tags of the allocations that are currently allocated to the node. Likewise, a rack inherits the tags of its nodes. Moreover, similar to node labels and unlike node attributes, allocation tags have no value attached to them. As we show below, our constraints can refer to allocation tags, as well as node labels and node attributes.
-
-
-$H3 Placement constraints API
-
-Applications can use the public API in the `PlacementConstraints` to construct placement constraint. Before describing the methods for building constraints, we describe the methods of the `PlacementTargets` class that are used to construct the target expressions that will then be used in constraints:
-
-| Method | Description |
-|:------ |:----------- |
-| `allocationTag(String... allocationTags)` | Constructs a target expression on an allocation tag. It is satisfied if there are allocations with one of the given tags. |
-| `allocationTagToIntraApp(String... allocationTags)` | similar to `allocationTag(String...)`, but targeting only the containers of the application that will use this target (intra-application constraints). |
-| `nodePartition(String... nodePartitions)` | Constructs a target expression on a node partition. It is satisfied for nodes that belong to one of the `nodePartitions`. |
-| `nodeAttribute(String attributeKey, String... attributeValues)` | Constructs a target expression on a node attribute. It is satisfied if the specified node attribute has one of the specified values. |
-
-Note that the `nodeAttribute` method above is not yet functional, as it requires the ongoing node attributes feature.
-
-The methods of the `PlacementConstraints` class for building constraints are the following:
-
-| Method | Description |
-|:------ |:----------- |
-| `targetIn(String scope, TargetExpression... targetExpressions)` | Creates a constraint that requires allocations to be placed on nodes that satisfy all target expressions within the given scope (e.g., node or rack). For example, `targetIn(RACK, allocationTag("hbase-m"))`, allows allocations on nodes that belong to a rack that has at least one allocation with tag "hbase-m". |
-| `targetNotIn(String scope, TargetExpression... targetExpressions)` | Creates a constraint that requires allocations to be placed on nodes that belong to a scope (e.g., node or rack) that does not satisfy any of the target expressions. |
-| `cardinality(String scope, int minCardinality, int maxCardinality, String... allocationTags)` | Creates a constraint that restricts the number of allocations within a given scope (e.g., node or rack). For example, {@code cardinality(NODE, 3, 10, "zk")} is satisfied on nodes where there are no less than 3 allocations with tag "zk" and no more than 10. |
-| `minCardinality(String scope, int minCardinality, String... allocationTags)` | Similar to `cardinality(String, int, int, String...)`, but determines only the minimum cardinality (the maximum cardinality is unbound). |
-| `maxCardinality(String scope, int maxCardinality, String... allocationTags)` | Similar to `cardinality(String, int, int, String...)`, but determines only the maximum cardinality (the minimum cardinality is 0). |
-| `targetCardinality(String scope, int minCardinality, int maxCardinality, String... allocationTags)` | This constraint generalizes the cardinality and target constraints. Consider a set of nodes N that belongs to the scope specified in the constraint. If the target expressions are satisfied at least minCardinality times and at most maxCardinality times in the node set N, then the constraint is satisfied. For example, `targetCardinality(RACK, 2, 10, allocationTag("zk"))`, requires an allocation to be placed within a rack that has at least 2 and at most 10 other allocations with tag "zk". |
-
-The `PlacementConstraints` class also includes method for building compound constraints (AND/OR expressions with multiple constraints). Adding support for compound constraints is work in progress.
-
-
-$H3 Specifying constraints in applications
-
-Applications have to specify the containers for which each constraint will be enabled. To this end, applications can provide a mapping from a set of allocation tags (source tags) to a placement constraint. For example, an entry of this mapping could be "hbase"->constraint1, which means that constraint1 will be applied when scheduling each allocation with tag "hbase".
-
-When using the placement processor approach (see [Enabling placement constraints](#Enabling_placement_constraints)), this constraint mapping is specified within the `RegisterApplicationMasterRequest`.
-
-When using the placement allocator in the capacity scheduler, the constraints can also be added at each `SchedulingRequest` object. Each such constraint is valid for the tag of that scheduling request. In case constraints are specified both at the `ReisterApplicationMasterRequest` and the scheduling requests, the latter override the former.
-


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[12/21] hadoop git commit: YARN-7920. Simplify configuration for PlacementConstraints. Contributed by Wangda Tan.

Posted by ha...@apache.org.

YARN-7920. Simplify configuration for PlacementConstraints. Contributed by Wangda Tan.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0b489e56
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0b489e56
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0b489e56

Branch: refs/heads/HDFS-12996
Commit: 0b489e564ce5a50324a530e29c18aa8a75276c50
Parents: 4747395
Author: Konstantinos Karanasos <kk...@apache.org>
Authored: Thu Feb 15 14:23:27 2018 -0800
Committer: Konstantinos Karanasos <kk...@apache.org>
Committed: Thu Feb 15 14:23:38 2018 -0800

----------------------------------------------------------------------
 .../hadoop/yarn/conf/YarnConfiguration.java     |  54 ++-
 .../TestAMRMClientPlacementConstraints.java     |   3 +-
 .../src/main/resources/yarn-default.xml         |  10 +-
 .../ApplicationMasterService.java               |  46 ++-
 .../scheduler/capacity/CapacityScheduler.java   |  13 -
 .../CapacitySchedulerConfiguration.java         |   5 -
 .../processor/AbstractPlacementProcessor.java   |  96 +++++
 .../processor/DisabledPlacementProcessor.java   |  77 ++++
 .../processor/PlacementConstraintProcessor.java | 340 +++++++++++++++++
 .../processor/PlacementProcessor.java           | 377 -------------------
 .../processor/SchedulerPlacementProcessor.java  |  55 +++
 ...apacitySchedulerSchedulingRequestUpdate.java |   4 +
 ...estSchedulingRequestContainerAllocation.java |   8 +-
 ...hedulingRequestContainerAllocationAsync.java |   4 +-
 .../scheduler/capacity/TestUtils.java           |   4 +-
 .../constraint/TestPlacementProcessor.java      |  12 +-
 .../src/site/markdown/PlacementConstraints.md   | 136 +++++++
 .../site/markdown/PlacementConstraints.md.vm    | 149 --------
 18 files changed, 818 insertions(+), 575 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 118f9fb..6677478 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -532,11 +532,57 @@ public class YarnConfiguration extends Configuration {
   public static final String RM_SCHEDULER = 
     RM_PREFIX + "scheduler.class";
 
-  /** Enable rich placement constraints. */
-  public static final String RM_PLACEMENT_CONSTRAINTS_ENABLED =
-      RM_PREFIX + "placement-constraints.enabled";
+  /**
+   * Specify which handler will be used to process PlacementConstraints.
+   * For details on PlacementConstraints, please refer to
+   * {@link org.apache.hadoop.yarn.api.resource.PlacementConstraint}
+   */
+  @Private
+  public static final String RM_PLACEMENT_CONSTRAINTS_HANDLER =
+      RM_PREFIX + "placement-constraints.handler";
+
+  /**
+   * This handler rejects all allocate calls made by an application, if they
+   * contain a {@link org.apache.hadoop.yarn.api.records.SchedulingRequest}.
+   */
+  @Private
+  public static final String DISABLED_RM_PLACEMENT_CONSTRAINTS_HANDLER =
+      "disabled";
 
-  public static final boolean DEFAULT_RM_PLACEMENT_CONSTRAINTS_ENABLED = false;
+  /**
+   * Using this handler, the placement of containers with constraints is
+   * determined as a pre-processing step before the capacity or the fair
+   * scheduler is called. Once the placement is decided, the capacity/fair
+   * scheduler is invoked to perform the actual allocation. The advantage of
+   * this approach is that it supports all constraint types (affinity,
+   * anti-affinity, cardinality). Moreover, it considers multiple containers at
+   * a time, which allows to satisfy more constraints than a container-at-a-time
+   * approach can achieve. As it sits outside the main scheduler, it can be used
+   * by both the capacity and fair schedulers. Note that at the moment it does
+   * not account for task priorities within an application, given that such
+   * priorities might be conflicting with the placement constraints.
+   */
+  @Private
+  public static final String PROCESSOR_RM_PLACEMENT_CONSTRAINTS_HANDLER =
+      "placement-processor";
+
+  /**
+   * Using this handler, containers with constraints will be placed by the main
+   * scheduler. If the configured RM scheduler
+   * <pre>yarn.resourcemanager.scheduler.class</pre>
+   * cannot handle placement constraints, the corresponding SchedulingRequests
+   * will be rejected. As of now, only the capacity scheduler supports
+   * SchedulingRequests. In particular, it currently supports anti-affinity
+   * constraints (no affinity or cardinality) and places one container at a
+   * time. The advantage of this handler compared to the placement-processor is
+   * that it follows the same ordering rules for queues (sorted by utilization,
+   * priority) and apps (sorted by FIFO/fairness/priority) as the ones followed
+   * by the main scheduler.
+   */
+  @Private
+  public static final String
+      SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER =
+      "scheduler";
 
   /** Placement Algorithm. */
   public static final String RM_PLACEMENT_CONSTRAINTS_ALGORITHM_CLASS =

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientPlacementConstraints.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientPlacementConstraints.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientPlacementConstraints.java
index fdc8d58..0e88299 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientPlacementConstraints.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientPlacementConstraints.java
@@ -65,7 +65,8 @@ public class TestAMRMClientPlacementConstraints extends BaseAMRMClientTest {
     // mismatches between client and server
     teardown();
     conf = new YarnConfiguration();
-    conf.setBoolean(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_ENABLED, true);
+    conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
+        YarnConfiguration.PROCESSOR_RM_PLACEMENT_CONSTRAINTS_HANDLER);
     createClusterAndStartApplication(conf);
 
     AMRMClient<AMRMClient.ContainerRequest> amClient =

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 509a040..adf8d8a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -131,9 +131,13 @@
   </property>
 
   <property>
-    <description>Enable Constraint Placement.</description>
-    <name>yarn.resourcemanager.placement-constraints.enabled</name>
-    <value>false</value>
+    <description>
+      Specify which handler will be used to process PlacementConstraints.
+      Acceptable values are: `placement-processor`, `scheduler` and `disabled`.
+      For a detailed explanation of these values, please refer to documentation.
+    </description>
+    <name>yarn.resourcemanager.placement-constraints.handler</name>
+    <value>disabled</value>
   </property>
 
   <property>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
index aa1177d..ae28879 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
@@ -59,7 +59,6 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.processor.PlacementProcessor;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
@@ -67,6 +66,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.processor.AbstractPlacementProcessor;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.processor.DisabledPlacementProcessor;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.processor.PlacementConstraintProcessor;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.processor.SchedulerPlacementProcessor;
 import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager;
 import org.apache.hadoop.yarn.server.resourcemanager.security.authorize.RMPolicyProvider;
 import org.apache.hadoop.yarn.server.security.MasterKeyData;
@@ -118,20 +121,47 @@ public class ApplicationMasterService extends AbstractService implements
     initializeProcessingChain(conf);
   }
 
+  private void addPlacementConstraintHandler(Configuration conf) {
+    String placementConstraintsHandler =
+        conf.get(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
+            YarnConfiguration.DISABLED_RM_PLACEMENT_CONSTRAINTS_HANDLER);
+    if (placementConstraintsHandler
+        .equals(YarnConfiguration.DISABLED_RM_PLACEMENT_CONSTRAINTS_HANDLER)) {
+      LOG.info(YarnConfiguration.DISABLED_RM_PLACEMENT_CONSTRAINTS_HANDLER
+          + " placement handler will be used, all scheduling requests will "
+          + "be rejected.");
+      amsProcessingChain.addProcessor(new DisabledPlacementProcessor());
+    } else if (placementConstraintsHandler
+        .equals(YarnConfiguration.PROCESSOR_RM_PLACEMENT_CONSTRAINTS_HANDLER)) {
+      LOG.info(YarnConfiguration.PROCESSOR_RM_PLACEMENT_CONSTRAINTS_HANDLER
+          + " placement handler will be used. Scheduling requests will be "
+          + "handled by the placement constraint processor");
+      amsProcessingChain.addProcessor(new PlacementConstraintProcessor());
+    } else if (placementConstraintsHandler
+        .equals(YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER)) {
+      LOG.info(YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER
+          + " placement handler will be used. Scheduling requests will be "
+          + "handled by the main scheduler.");
+      amsProcessingChain.addProcessor(new SchedulerPlacementProcessor());
+    }
+  }
+
   private void initializeProcessingChain(Configuration conf) {
     amsProcessingChain.init(rmContext, null);
-    boolean enablePlacementConstraints = conf.getBoolean(
-        YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_ENABLED,
-        YarnConfiguration.DEFAULT_RM_PLACEMENT_CONSTRAINTS_ENABLED);
-    if (enablePlacementConstraints) {
-      amsProcessingChain.addProcessor(new PlacementProcessor());
-    }
+    addPlacementConstraintHandler(conf);
+
     List<ApplicationMasterServiceProcessor> processors = getProcessorList(conf);
     if (processors != null) {
       Collections.reverse(processors);
       for (ApplicationMasterServiceProcessor p : processors) {
         // Ensure only single instance of PlacementProcessor is included
-        if (enablePlacementConstraints && p instanceof PlacementProcessor) {
+        if (p instanceof AbstractPlacementProcessor) {
+          LOG.warn("Found PlacementProcessor=" + p.getClass().getCanonicalName()
+              + " defined in "
+              + YarnConfiguration.RM_APPLICATION_MASTER_SERVICE_PROCESSORS
+              + ", however PlacementProcessor handler should be configured "
+              + "by using " + YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER
+              + ", this processor will be ignored.");
           continue;
         }
         this.amsProcessingChain.addProcessor(p);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
index cd9d1373..ddab0c1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
@@ -63,7 +63,6 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.api.records.ResourceSizing;
 import org.apache.hadoop.yarn.api.records.SchedulingRequest;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.exceptions.SchedulerInvalidResoureRequestException;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes;
@@ -1098,18 +1097,6 @@ public class CapacityScheduler extends
       return EMPTY_ALLOCATION;
     }
 
-    if ((!getConfiguration().getBoolean(
-        CapacitySchedulerConfiguration.SCHEDULING_REQUEST_ALLOWED,
-        CapacitySchedulerConfiguration.DEFAULT_SCHEDULING_REQUEST_ALLOWED))
-        && schedulingRequests != null && (!schedulingRequests.isEmpty())) {
-      throw new SchedulerInvalidResoureRequestException(
-          "Application attempt:" + applicationAttemptId
-              + " is using SchedulingRequest, which is disabled. Please update "
-              + CapacitySchedulerConfiguration.SCHEDULING_REQUEST_ALLOWED
-              + " to true in capacity-scheduler.xml in order to use this "
-              + "feature.");
-    }
-
     // The allocate may be the leftover from previous attempt, and it will
     // impact current attempt, such as confuse the request and allocation for
     // current attempt's AM container.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
index 00733a1..e609be9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
@@ -77,11 +77,6 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
   
   @Private
   public static final String PREFIX = "yarn.scheduler.capacity.";
-
-  @Private
-  public static final String SCHEDULING_REQUEST_ALLOWED =
-      PREFIX + "scheduling-request.allowed";
-  public static final boolean DEFAULT_SCHEDULING_REQUEST_ALLOWED = false;
   
   @Private
   public static final String DOT = ".";

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/AbstractPlacementProcessor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/AbstractPlacementProcessor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/AbstractPlacementProcessor.java
new file mode 100644
index 0000000..96ae623
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/AbstractPlacementProcessor.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.processor;
+
+import org.apache.hadoop.yarn.ams.ApplicationMasterServiceContext;
+import org.apache.hadoop.yarn.ams.ApplicationMasterServiceProcessor;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.resource.PlacementConstraint;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.PlacementConstraintManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Base class for all PlacementProcessors.
+ */
+public abstract class AbstractPlacementProcessor implements
+    ApplicationMasterServiceProcessor{
+  private static final Logger LOG =
+      LoggerFactory.getLogger(AbstractPlacementProcessor.class);
+
+  protected ApplicationMasterServiceProcessor nextAMSProcessor;
+  protected AbstractYarnScheduler scheduler;
+  private PlacementConstraintManager constraintManager;
+
+  @Override
+  public void init(ApplicationMasterServiceContext amsContext,
+      ApplicationMasterServiceProcessor nextProcessor) {
+    this.nextAMSProcessor = nextProcessor;
+    this.scheduler =
+        (AbstractYarnScheduler) ((RMContextImpl) amsContext).getScheduler();
+    this.constraintManager =
+        ((RMContextImpl)amsContext).getPlacementConstraintManager();
+  }
+
+  @Override
+  public void registerApplicationMaster(
+      ApplicationAttemptId applicationAttemptId,
+      RegisterApplicationMasterRequest request,
+      RegisterApplicationMasterResponse response)
+      throws IOException, YarnException {
+    Map<Set<String>, PlacementConstraint> appPlacementConstraints =
+        request.getPlacementConstraints();
+    processPlacementConstraints(applicationAttemptId.getApplicationId(),
+        appPlacementConstraints);
+    nextAMSProcessor.registerApplicationMaster(applicationAttemptId, request,
+        response);
+  }
+
+  private void processPlacementConstraints(ApplicationId applicationId,
+      Map<Set<String>, PlacementConstraint> appPlacementConstraints) {
+    if (appPlacementConstraints != null && !appPlacementConstraints.isEmpty()) {
+      LOG.info("Constraints added for application [{}] against tags [{}]",
+          applicationId, appPlacementConstraints);
+      constraintManager.registerApplication(
+          applicationId, appPlacementConstraints);
+    }
+  }
+
+  @Override
+  public void finishApplicationMaster(ApplicationAttemptId applicationAttemptId,
+      FinishApplicationMasterRequest request,
+      FinishApplicationMasterResponse response) {
+    constraintManager.unregisterApplication(
+        applicationAttemptId.getApplicationId());
+    this.nextAMSProcessor.finishApplicationMaster(applicationAttemptId, request,
+        response);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/DisabledPlacementProcessor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/DisabledPlacementProcessor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/DisabledPlacementProcessor.java
new file mode 100644
index 0000000..0d093a7
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/DisabledPlacementProcessor.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.processor;
+
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+/**
+ * Processor that reject all SchedulingRequests.
+ */
+public class DisabledPlacementProcessor extends AbstractPlacementProcessor {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(DisabledPlacementProcessor.class);
+
+  @Override
+  public void registerApplicationMaster(
+      ApplicationAttemptId applicationAttemptId,
+      RegisterApplicationMasterRequest request,
+      RegisterApplicationMasterResponse response)
+      throws IOException, YarnException {
+    if (request.getPlacementConstraints() != null && !request
+        .getPlacementConstraints().isEmpty()) {
+      String message = "Found non empty placement constraints map in "
+          + "RegisterApplicationMasterRequest for application="
+          + applicationAttemptId.toString() + ", but the configured "
+          + YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER
+          + " cannot handle placement constraints. Rejecting this "
+          + "registerApplicationMaster operation";
+      LOG.warn(message);
+      throw new YarnException(message);
+    }
+    nextAMSProcessor.registerApplicationMaster(applicationAttemptId, request,
+        response);
+  }
+
+  @Override
+  public void allocate(ApplicationAttemptId appAttemptId,
+      AllocateRequest request, AllocateResponse response) throws YarnException {
+    if (request.getSchedulingRequests() != null && !request
+        .getSchedulingRequests().isEmpty()) {
+      String message = "Found non empty SchedulingRequest in "
+          + "AllocateRequest for application="
+          + appAttemptId.toString() + ", but the configured "
+          + YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER
+          + " cannot handle placement constraints. Rejecting this "
+          + "allocate operation";
+      LOG.warn(message);
+      throw new YarnException(message);
+    }
+    nextAMSProcessor.allocate(appAttemptId, request, response);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/PlacementConstraintProcessor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/PlacementConstraintProcessor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/PlacementConstraintProcessor.java
new file mode 100644
index 0000000..f089a19
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/PlacementConstraintProcessor.java
@@ -0,0 +1,340 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.processor;
+
+import org.apache.hadoop.yarn.ams.ApplicationMasterServiceContext;
+import org.apache.hadoop.yarn.ams.ApplicationMasterServiceProcessor;
+import org.apache.hadoop.yarn.ams.ApplicationMasterServiceUtils;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.RejectedSchedulingRequest;
+import org.apache.hadoop.yarn.api.records.RejectionReason;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceSizing;
+import org.apache.hadoop.yarn.api.records.SchedulingRequest;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.algorithm.DefaultPlacementAlgorithm;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.api.ConstraintPlacementAlgorithm;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.api.PlacedSchedulingRequest;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.api.SchedulingRequestWithPlacementAttempt;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.api.SchedulingResponse;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.stream.Collectors;
+
+/**
+ * An ApplicationMasterServiceProcessor that performs Constrained placement of
+ * Scheduling Requests. It does the following:
+ * 1. All initialization.
+ * 2. Intercepts placement constraints from the register call and adds it to
+ *    the placement constraint manager.
+ * 3. Dispatches Scheduling Requests to the Planner.
+ */
+public class PlacementConstraintProcessor extends AbstractPlacementProcessor {
+
+  /**
+   * Wrapper over the SchedulingResponse that wires in the placement attempt
+   * and last attempted Node.
+   */
+  static final class Response extends SchedulingResponse {
+
+    private final int placementAttempt;
+    private final SchedulerNode attemptedNode;
+
+    private Response(boolean isSuccess, ApplicationId applicationId,
+        SchedulingRequest schedulingRequest, int placementAttempt,
+        SchedulerNode attemptedNode) {
+      super(isSuccess, applicationId, schedulingRequest);
+      this.placementAttempt = placementAttempt;
+      this.attemptedNode = attemptedNode;
+    }
+  }
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(PlacementConstraintProcessor.class);
+
+  private ExecutorService schedulingThreadPool;
+  private int retryAttempts;
+  private Map<ApplicationId, List<BatchedRequests>> requestsToRetry =
+      new ConcurrentHashMap<>();
+  private Map<ApplicationId, List<SchedulingRequest>> requestsToReject =
+      new ConcurrentHashMap<>();
+
+  private BatchedRequests.IteratorType iteratorType;
+  private PlacementDispatcher placementDispatcher;
+
+
+  @Override
+  public void init(ApplicationMasterServiceContext amsContext,
+      ApplicationMasterServiceProcessor nextProcessor) {
+    LOG.info("Initializing Constraint Placement Processor:");
+    super.init(amsContext, nextProcessor);
+
+    // Only the first class is considered - even if a comma separated
+    // list is provided. (This is for simplicity, since getInstances does a
+    // lot of good things by handling things correctly)
+    List<ConstraintPlacementAlgorithm> instances =
+        ((RMContextImpl) amsContext).getYarnConfiguration().getInstances(
+            YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_ALGORITHM_CLASS,
+            ConstraintPlacementAlgorithm.class);
+    ConstraintPlacementAlgorithm algorithm = null;
+    if (instances != null && !instances.isEmpty()) {
+      algorithm = instances.get(0);
+    } else {
+      algorithm = new DefaultPlacementAlgorithm();
+    }
+    LOG.info("Placement Algorithm [{}]", algorithm.getClass().getName());
+
+    String iteratorName = ((RMContextImpl) amsContext).getYarnConfiguration()
+        .get(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_ALGORITHM_ITERATOR,
+            BatchedRequests.IteratorType.SERIAL.name());
+    LOG.info("Placement Algorithm Iterator[{}]", iteratorName);
+    try {
+      iteratorType = BatchedRequests.IteratorType.valueOf(iteratorName);
+    } catch (IllegalArgumentException e) {
+      throw new YarnRuntimeException(
+          "Could not instantiate Placement Algorithm Iterator: ", e);
+    }
+
+    int algoPSize = ((RMContextImpl) amsContext).getYarnConfiguration().getInt(
+        YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_ALGORITHM_POOL_SIZE,
+        YarnConfiguration.DEFAULT_RM_PLACEMENT_CONSTRAINTS_ALGORITHM_POOL_SIZE);
+    this.placementDispatcher = new PlacementDispatcher();
+    this.placementDispatcher.init(
+        ((RMContextImpl)amsContext), algorithm, algoPSize);
+    LOG.info("Planning Algorithm pool size [{}]", algoPSize);
+
+    int schedPSize = ((RMContextImpl) amsContext).getYarnConfiguration().getInt(
+        YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_SCHEDULER_POOL_SIZE,
+        YarnConfiguration.DEFAULT_RM_PLACEMENT_CONSTRAINTS_SCHEDULER_POOL_SIZE);
+    this.schedulingThreadPool = Executors.newFixedThreadPool(schedPSize);
+    LOG.info("Scheduler pool size [{}]", schedPSize);
+
+    // Number of times a request that is not satisfied by the scheduler
+    // can be retried.
+    this.retryAttempts =
+        ((RMContextImpl) amsContext).getYarnConfiguration().getInt(
+            YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_RETRY_ATTEMPTS,
+            YarnConfiguration.DEFAULT_RM_PLACEMENT_CONSTRAINTS_RETRY_ATTEMPTS);
+    LOG.info("Num retry attempts [{}]", this.retryAttempts);
+  }
+
+  @Override
+  public void allocate(ApplicationAttemptId appAttemptId,
+      AllocateRequest request, AllocateResponse response) throws YarnException {
+    // Copy the scheduling request since we will clear it later after sending
+    // to dispatcher
+    List<SchedulingRequest> schedulingRequests =
+        new ArrayList<>(request.getSchedulingRequests());
+    dispatchRequestsForPlacement(appAttemptId, schedulingRequests);
+    reDispatchRetryableRequests(appAttemptId);
+    schedulePlacedRequests(appAttemptId);
+
+    // Remove SchedulingRequest from AllocateRequest to avoid SchedulingRequest
+    // added to scheduler.
+    request.setSchedulingRequests(Collections.emptyList());
+
+    nextAMSProcessor.allocate(appAttemptId, request, response);
+
+    handleRejectedRequests(appAttemptId, response);
+  }
+
+  private void dispatchRequestsForPlacement(ApplicationAttemptId appAttemptId,
+      List<SchedulingRequest> schedulingRequests) {
+    if (schedulingRequests != null && !schedulingRequests.isEmpty()) {
+      // Normalize the Requests before dispatching
+      schedulingRequests.forEach(req -> {
+        Resource reqResource = req.getResourceSizing().getResources();
+        req.getResourceSizing()
+            .setResources(this.scheduler.getNormalizedResource(reqResource));
+      });
+      this.placementDispatcher.dispatch(new BatchedRequests(iteratorType,
+          appAttemptId.getApplicationId(), schedulingRequests, 1));
+    }
+  }
+
+  private void reDispatchRetryableRequests(ApplicationAttemptId appAttId) {
+    List<BatchedRequests> reqsToRetry =
+        this.requestsToRetry.get(appAttId.getApplicationId());
+    if (reqsToRetry != null && !reqsToRetry.isEmpty()) {
+      synchronized (reqsToRetry) {
+        for (BatchedRequests bReq: reqsToRetry) {
+          this.placementDispatcher.dispatch(bReq);
+        }
+        reqsToRetry.clear();
+      }
+    }
+  }
+
+  private void schedulePlacedRequests(ApplicationAttemptId appAttemptId) {
+    ApplicationId applicationId = appAttemptId.getApplicationId();
+    List<PlacedSchedulingRequest> placedSchedulingRequests =
+        this.placementDispatcher.pullPlacedRequests(applicationId);
+    for (PlacedSchedulingRequest placedReq : placedSchedulingRequests) {
+      SchedulingRequest sReq = placedReq.getSchedulingRequest();
+      for (SchedulerNode node : placedReq.getNodes()) {
+        final SchedulingRequest sReqClone =
+            SchedulingRequest.newInstance(sReq.getAllocationRequestId(),
+                sReq.getPriority(), sReq.getExecutionType(),
+                sReq.getAllocationTags(),
+                ResourceSizing.newInstance(
+                    sReq.getResourceSizing().getResources()),
+                sReq.getPlacementConstraint());
+        SchedulerApplicationAttempt applicationAttempt =
+            this.scheduler.getApplicationAttempt(appAttemptId);
+        Runnable task = () -> {
+          boolean success =
+              scheduler.attemptAllocationOnNode(
+                  applicationAttempt, sReqClone, node);
+          if (!success) {
+            LOG.warn("Unsuccessful allocation attempt [{}] for [{}]",
+                placedReq.getPlacementAttempt(), sReqClone);
+          }
+          handleSchedulingResponse(
+              new Response(success, applicationId, sReqClone,
+              placedReq.getPlacementAttempt(), node));
+        };
+        this.schedulingThreadPool.submit(task);
+      }
+    }
+  }
+
+  private void handleRejectedRequests(ApplicationAttemptId appAttemptId,
+      AllocateResponse response) {
+    List<SchedulingRequestWithPlacementAttempt> rejectedAlgoRequests =
+        this.placementDispatcher.pullRejectedRequests(
+            appAttemptId.getApplicationId());
+    if (rejectedAlgoRequests != null && !rejectedAlgoRequests.isEmpty()) {
+      LOG.warn("Following requests of [{}] were rejected by" +
+              " the PlacementAlgorithmOutput Algorithm: {}",
+          appAttemptId.getApplicationId(), rejectedAlgoRequests);
+      rejectedAlgoRequests.stream()
+          .filter(req -> req.getPlacementAttempt() < retryAttempts)
+          .forEach(req -> handleSchedulingResponse(
+              new Response(false, appAttemptId.getApplicationId(),
+                  req.getSchedulingRequest(), req.getPlacementAttempt(),
+                  null)));
+      ApplicationMasterServiceUtils.addToRejectedSchedulingRequests(response,
+          rejectedAlgoRequests.stream()
+              .filter(req -> req.getPlacementAttempt() >= retryAttempts)
+              .map(sr -> RejectedSchedulingRequest.newInstance(
+                  RejectionReason.COULD_NOT_PLACE_ON_NODE,
+                  sr.getSchedulingRequest()))
+              .collect(Collectors.toList()));
+    }
+    List<SchedulingRequest> rejectedRequests =
+        this.requestsToReject.get(appAttemptId.getApplicationId());
+    if (rejectedRequests != null && !rejectedRequests.isEmpty()) {
+      synchronized (rejectedRequests) {
+        LOG.warn("Following requests of [{}] exhausted all retry attempts " +
+                "trying to schedule on placed node: {}",
+            appAttemptId.getApplicationId(), rejectedRequests);
+        ApplicationMasterServiceUtils.addToRejectedSchedulingRequests(response,
+            rejectedRequests.stream()
+                .map(sr -> RejectedSchedulingRequest.newInstance(
+                    RejectionReason.COULD_NOT_SCHEDULE_ON_NODE, sr))
+                .collect(Collectors.toList()));
+        rejectedRequests.clear();
+      }
+    }
+  }
+
+  @Override
+  public void finishApplicationMaster(ApplicationAttemptId appAttemptId,
+      FinishApplicationMasterRequest request,
+      FinishApplicationMasterResponse response) {
+    placementDispatcher.clearApplicationState(appAttemptId.getApplicationId());
+    requestsToReject.remove(appAttemptId.getApplicationId());
+    requestsToRetry.remove(appAttemptId.getApplicationId());
+    super.finishApplicationMaster(appAttemptId, request, response);
+  }
+
+  private void handleSchedulingResponse(SchedulingResponse schedulerResponse) {
+    int placementAttempt = ((Response)schedulerResponse).placementAttempt;
+    // Retry this placement as it is not successful and we are still
+    // under max retry. The req is batched with other unsuccessful
+    // requests from the same app
+    if (!schedulerResponse.isSuccess() && placementAttempt < retryAttempts) {
+      List<BatchedRequests> reqsToRetry =
+          requestsToRetry.computeIfAbsent(
+              schedulerResponse.getApplicationId(),
+              k -> new ArrayList<>());
+      synchronized (reqsToRetry) {
+        addToRetryList(schedulerResponse, placementAttempt, reqsToRetry);
+      }
+      LOG.warn("Going to retry request for application [{}] after [{}]" +
+              " attempts: [{}]", schedulerResponse.getApplicationId(),
+          placementAttempt, schedulerResponse.getSchedulingRequest());
+    } else {
+      if (!schedulerResponse.isSuccess()) {
+        LOG.warn("Not retrying request for application [{}] after [{}]" +
+                " attempts: [{}]", schedulerResponse.getApplicationId(),
+            placementAttempt, schedulerResponse.getSchedulingRequest());
+        List<SchedulingRequest> reqsToReject =
+            requestsToReject.computeIfAbsent(
+                schedulerResponse.getApplicationId(),
+                k -> new ArrayList<>());
+        synchronized (reqsToReject) {
+          reqsToReject.add(schedulerResponse.getSchedulingRequest());
+        }
+      }
+    }
+  }
+
+  private void addToRetryList(SchedulingResponse schedulerResponse,
+      int placementAttempt, List<BatchedRequests> reqsToRetry) {
+    boolean isAdded = false;
+    for (BatchedRequests br : reqsToRetry) {
+      if (br.getPlacementAttempt() == placementAttempt + 1) {
+        br.addToBatch(schedulerResponse.getSchedulingRequest());
+        br.addToBlacklist(
+            schedulerResponse.getSchedulingRequest().getAllocationTags(),
+            ((Response) schedulerResponse).attemptedNode);
+        isAdded = true;
+        break;
+      }
+    }
+    if (!isAdded) {
+      BatchedRequests br = new BatchedRequests(iteratorType,
+          schedulerResponse.getApplicationId(),
+          Collections.singleton(schedulerResponse.getSchedulingRequest()),
+          placementAttempt + 1);
+      reqsToRetry.add(br);
+      br.addToBlacklist(
+          schedulerResponse.getSchedulingRequest().getAllocationTags(),
+          ((Response) schedulerResponse).attemptedNode);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/PlacementProcessor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/PlacementProcessor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/PlacementProcessor.java
deleted file mode 100644
index 9ce38f4..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/PlacementProcessor.java
+++ /dev/null
@@ -1,377 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.processor;
-
-import org.apache.hadoop.yarn.ams.ApplicationMasterServiceContext;
-import org.apache.hadoop.yarn.ams.ApplicationMasterServiceProcessor;
-import org.apache.hadoop.yarn.ams.ApplicationMasterServiceUtils;
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.RejectedSchedulingRequest;
-import org.apache.hadoop.yarn.api.records.RejectionReason;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.api.records.ResourceSizing;
-import org.apache.hadoop.yarn.api.records.SchedulingRequest;
-import org.apache.hadoop.yarn.api.resource.PlacementConstraint;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.exceptions.YarnException;
-import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
-import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.PlacementConstraintManager;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.algorithm.DefaultPlacementAlgorithm;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.api.ConstraintPlacementAlgorithm;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.api.PlacedSchedulingRequest;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.api.SchedulingRequestWithPlacementAttempt;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.api.SchedulingResponse;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.stream.Collectors;
-
-/**
- * An ApplicationMasterService Processor that performs Constrained placement of
- * Scheduling Requests. It does the following:
- * 1. All initialization.
- * 2. Intercepts placement constraints from the register call and adds it to
- *    the placement constraint manager.
- * 3. Dispatches Scheduling Requests to the Planner.
- */
-public class PlacementProcessor implements ApplicationMasterServiceProcessor {
-
-  /**
-   * Wrapper over the SchedulingResponse that wires in the placement attempt
-   * and last attempted Node.
-   */
-  static final class Response extends SchedulingResponse {
-
-    private final int placementAttempt;
-    private final SchedulerNode attemptedNode;
-
-    private Response(boolean isSuccess, ApplicationId applicationId,
-        SchedulingRequest schedulingRequest, int placementAttempt,
-        SchedulerNode attemptedNode) {
-      super(isSuccess, applicationId, schedulingRequest);
-      this.placementAttempt = placementAttempt;
-      this.attemptedNode = attemptedNode;
-    }
-  }
-
-  private static final Logger LOG =
-      LoggerFactory.getLogger(PlacementProcessor.class);
-  private PlacementConstraintManager constraintManager;
-  private ApplicationMasterServiceProcessor nextAMSProcessor;
-
-  private AbstractYarnScheduler scheduler;
-  private ExecutorService schedulingThreadPool;
-  private int retryAttempts;
-  private Map<ApplicationId, List<BatchedRequests>> requestsToRetry =
-      new ConcurrentHashMap<>();
-  private Map<ApplicationId, List<SchedulingRequest>> requestsToReject =
-      new ConcurrentHashMap<>();
-
-  private BatchedRequests.IteratorType iteratorType;
-  private PlacementDispatcher placementDispatcher;
-
-
-  @Override
-  public void init(ApplicationMasterServiceContext amsContext,
-      ApplicationMasterServiceProcessor nextProcessor) {
-    LOG.info("Initializing Constraint Placement Processor:");
-    this.nextAMSProcessor = nextProcessor;
-    this.constraintManager =
-        ((RMContextImpl)amsContext).getPlacementConstraintManager();
-
-    this.scheduler =
-        (AbstractYarnScheduler)((RMContextImpl)amsContext).getScheduler();
-    // Only the first class is considered - even if a comma separated
-    // list is provided. (This is for simplicity, since getInstances does a
-    // lot of good things by handling things correctly)
-    List<ConstraintPlacementAlgorithm> instances =
-        ((RMContextImpl) amsContext).getYarnConfiguration().getInstances(
-            YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_ALGORITHM_CLASS,
-            ConstraintPlacementAlgorithm.class);
-    ConstraintPlacementAlgorithm algorithm = null;
-    if (instances != null && !instances.isEmpty()) {
-      algorithm = instances.get(0);
-    } else {
-      algorithm = new DefaultPlacementAlgorithm();
-    }
-    LOG.info("Placement Algorithm [{}]", algorithm.getClass().getName());
-
-    String iteratorName = ((RMContextImpl) amsContext).getYarnConfiguration()
-        .get(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_ALGORITHM_ITERATOR,
-            BatchedRequests.IteratorType.SERIAL.name());
-    LOG.info("Placement Algorithm Iterator[{}]", iteratorName);
-    try {
-      iteratorType = BatchedRequests.IteratorType.valueOf(iteratorName);
-    } catch (IllegalArgumentException e) {
-      throw new YarnRuntimeException(
-          "Could not instantiate Placement Algorithm Iterator: ", e);
-    }
-
-    int algoPSize = ((RMContextImpl) amsContext).getYarnConfiguration().getInt(
-        YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_ALGORITHM_POOL_SIZE,
-        YarnConfiguration.DEFAULT_RM_PLACEMENT_CONSTRAINTS_ALGORITHM_POOL_SIZE);
-    this.placementDispatcher = new PlacementDispatcher();
-    this.placementDispatcher.init(
-        ((RMContextImpl)amsContext), algorithm, algoPSize);
-    LOG.info("Planning Algorithm pool size [{}]", algoPSize);
-
-    int schedPSize = ((RMContextImpl) amsContext).getYarnConfiguration().getInt(
-        YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_SCHEDULER_POOL_SIZE,
-        YarnConfiguration.DEFAULT_RM_PLACEMENT_CONSTRAINTS_SCHEDULER_POOL_SIZE);
-    this.schedulingThreadPool = Executors.newFixedThreadPool(schedPSize);
-    LOG.info("Scheduler pool size [{}]", schedPSize);
-
-    // Number of times a request that is not satisfied by the scheduler
-    // can be retried.
-    this.retryAttempts =
-        ((RMContextImpl) amsContext).getYarnConfiguration().getInt(
-            YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_RETRY_ATTEMPTS,
-            YarnConfiguration.DEFAULT_RM_PLACEMENT_CONSTRAINTS_RETRY_ATTEMPTS);
-    LOG.info("Num retry attempts [{}]", this.retryAttempts);
-  }
-
-  @Override
-  public void registerApplicationMaster(ApplicationAttemptId appAttemptId,
-      RegisterApplicationMasterRequest request,
-      RegisterApplicationMasterResponse response)
-      throws IOException, YarnException {
-    Map<Set<String>, PlacementConstraint> appPlacementConstraints =
-        request.getPlacementConstraints();
-    processPlacementConstraints(
-        appAttemptId.getApplicationId(), appPlacementConstraints);
-    nextAMSProcessor.registerApplicationMaster(appAttemptId, request, response);
-  }
-
-  private void processPlacementConstraints(ApplicationId applicationId,
-      Map<Set<String>, PlacementConstraint> appPlacementConstraints) {
-    if (appPlacementConstraints != null && !appPlacementConstraints.isEmpty()) {
-      LOG.info("Constraints added for application [{}] against tags [{}]",
-          applicationId, appPlacementConstraints);
-      constraintManager.registerApplication(
-          applicationId, appPlacementConstraints);
-    }
-  }
-
-  @Override
-  public void allocate(ApplicationAttemptId appAttemptId,
-      AllocateRequest request, AllocateResponse response) throws YarnException {
-    // Copy the scheduling request since we will clear it later after sending
-    // to dispatcher
-    List<SchedulingRequest> schedulingRequests =
-        new ArrayList<>(request.getSchedulingRequests());
-    dispatchRequestsForPlacement(appAttemptId, schedulingRequests);
-    reDispatchRetryableRequests(appAttemptId);
-    schedulePlacedRequests(appAttemptId);
-
-    // Remove SchedulingRequest from AllocateRequest to avoid SchedulingRequest
-    // added to scheduler.
-    request.setSchedulingRequests(Collections.emptyList());
-
-    nextAMSProcessor.allocate(appAttemptId, request, response);
-
-    handleRejectedRequests(appAttemptId, response);
-  }
-
-  private void dispatchRequestsForPlacement(ApplicationAttemptId appAttemptId,
-      List<SchedulingRequest> schedulingRequests) {
-    if (schedulingRequests != null && !schedulingRequests.isEmpty()) {
-      // Normalize the Requests before dispatching
-      schedulingRequests.forEach(req -> {
-        Resource reqResource = req.getResourceSizing().getResources();
-        req.getResourceSizing()
-            .setResources(this.scheduler.getNormalizedResource(reqResource));
-      });
-      this.placementDispatcher.dispatch(new BatchedRequests(iteratorType,
-          appAttemptId.getApplicationId(), schedulingRequests, 1));
-    }
-  }
-
-  private void reDispatchRetryableRequests(ApplicationAttemptId appAttId) {
-    List<BatchedRequests> reqsToRetry =
-        this.requestsToRetry.get(appAttId.getApplicationId());
-    if (reqsToRetry != null && !reqsToRetry.isEmpty()) {
-      synchronized (reqsToRetry) {
-        for (BatchedRequests bReq: reqsToRetry) {
-          this.placementDispatcher.dispatch(bReq);
-        }
-        reqsToRetry.clear();
-      }
-    }
-  }
-
-  private void schedulePlacedRequests(ApplicationAttemptId appAttemptId) {
-    ApplicationId applicationId = appAttemptId.getApplicationId();
-    List<PlacedSchedulingRequest> placedSchedulingRequests =
-        this.placementDispatcher.pullPlacedRequests(applicationId);
-    for (PlacedSchedulingRequest placedReq : placedSchedulingRequests) {
-      SchedulingRequest sReq = placedReq.getSchedulingRequest();
-      for (SchedulerNode node : placedReq.getNodes()) {
-        final SchedulingRequest sReqClone =
-            SchedulingRequest.newInstance(sReq.getAllocationRequestId(),
-                sReq.getPriority(), sReq.getExecutionType(),
-                sReq.getAllocationTags(),
-                ResourceSizing.newInstance(
-                    sReq.getResourceSizing().getResources()),
-                sReq.getPlacementConstraint());
-        SchedulerApplicationAttempt applicationAttempt =
-            this.scheduler.getApplicationAttempt(appAttemptId);
-        Runnable task = () -> {
-          boolean success =
-              scheduler.attemptAllocationOnNode(
-                  applicationAttempt, sReqClone, node);
-          if (!success) {
-            LOG.warn("Unsuccessful allocation attempt [{}] for [{}]",
-                placedReq.getPlacementAttempt(), sReqClone);
-          }
-          handleSchedulingResponse(
-              new Response(success, applicationId, sReqClone,
-              placedReq.getPlacementAttempt(), node));
-        };
-        this.schedulingThreadPool.submit(task);
-      }
-    }
-  }
-
-  private void handleRejectedRequests(ApplicationAttemptId appAttemptId,
-      AllocateResponse response) {
-    List<SchedulingRequestWithPlacementAttempt> rejectedAlgoRequests =
-        this.placementDispatcher.pullRejectedRequests(
-            appAttemptId.getApplicationId());
-    if (rejectedAlgoRequests != null && !rejectedAlgoRequests.isEmpty()) {
-      LOG.warn("Following requests of [{}] were rejected by" +
-              " the PlacementAlgorithmOutput Algorithm: {}",
-          appAttemptId.getApplicationId(), rejectedAlgoRequests);
-      rejectedAlgoRequests.stream()
-          .filter(req -> req.getPlacementAttempt() < retryAttempts)
-          .forEach(req -> handleSchedulingResponse(
-              new Response(false, appAttemptId.getApplicationId(),
-                  req.getSchedulingRequest(), req.getPlacementAttempt(),
-                  null)));
-      ApplicationMasterServiceUtils.addToRejectedSchedulingRequests(response,
-          rejectedAlgoRequests.stream()
-              .filter(req -> req.getPlacementAttempt() >= retryAttempts)
-              .map(sr -> RejectedSchedulingRequest.newInstance(
-                  RejectionReason.COULD_NOT_PLACE_ON_NODE,
-                  sr.getSchedulingRequest()))
-              .collect(Collectors.toList()));
-    }
-    List<SchedulingRequest> rejectedRequests =
-        this.requestsToReject.get(appAttemptId.getApplicationId());
-    if (rejectedRequests != null && !rejectedRequests.isEmpty()) {
-      synchronized (rejectedRequests) {
-        LOG.warn("Following requests of [{}] exhausted all retry attempts " +
-                "trying to schedule on placed node: {}",
-            appAttemptId.getApplicationId(), rejectedRequests);
-        ApplicationMasterServiceUtils.addToRejectedSchedulingRequests(response,
-            rejectedRequests.stream()
-                .map(sr -> RejectedSchedulingRequest.newInstance(
-                    RejectionReason.COULD_NOT_SCHEDULE_ON_NODE, sr))
-                .collect(Collectors.toList()));
-        rejectedRequests.clear();
-      }
-    }
-  }
-
-  @Override
-  public void finishApplicationMaster(ApplicationAttemptId appAttemptId,
-      FinishApplicationMasterRequest request,
-      FinishApplicationMasterResponse response) {
-    constraintManager.unregisterApplication(appAttemptId.getApplicationId());
-    placementDispatcher.clearApplicationState(appAttemptId.getApplicationId());
-    requestsToReject.remove(appAttemptId.getApplicationId());
-    requestsToRetry.remove(appAttemptId.getApplicationId());
-    nextAMSProcessor.finishApplicationMaster(appAttemptId, request, response);
-  }
-
-  private void handleSchedulingResponse(SchedulingResponse schedulerResponse) {
-    int placementAttempt = ((Response)schedulerResponse).placementAttempt;
-    // Retry this placement as it is not successful and we are still
-    // under max retry. The req is batched with other unsuccessful
-    // requests from the same app
-    if (!schedulerResponse.isSuccess() && placementAttempt < retryAttempts) {
-      List<BatchedRequests> reqsToRetry =
-          requestsToRetry.computeIfAbsent(
-              schedulerResponse.getApplicationId(),
-              k -> new ArrayList<>());
-      synchronized (reqsToRetry) {
-        addToRetryList(schedulerResponse, placementAttempt, reqsToRetry);
-      }
-      LOG.warn("Going to retry request for application [{}] after [{}]" +
-              " attempts: [{}]", schedulerResponse.getApplicationId(),
-          placementAttempt, schedulerResponse.getSchedulingRequest());
-    } else {
-      if (!schedulerResponse.isSuccess()) {
-        LOG.warn("Not retrying request for application [{}] after [{}]" +
-                " attempts: [{}]", schedulerResponse.getApplicationId(),
-            placementAttempt, schedulerResponse.getSchedulingRequest());
-        List<SchedulingRequest> reqsToReject =
-            requestsToReject.computeIfAbsent(
-                schedulerResponse.getApplicationId(),
-                k -> new ArrayList<>());
-        synchronized (reqsToReject) {
-          reqsToReject.add(schedulerResponse.getSchedulingRequest());
-        }
-      }
-    }
-  }
-
-  private void addToRetryList(SchedulingResponse schedulerResponse,
-      int placementAttempt, List<BatchedRequests> reqsToRetry) {
-    boolean isAdded = false;
-    for (BatchedRequests br : reqsToRetry) {
-      if (br.getPlacementAttempt() == placementAttempt + 1) {
-        br.addToBatch(schedulerResponse.getSchedulingRequest());
-        br.addToBlacklist(
-            schedulerResponse.getSchedulingRequest().getAllocationTags(),
-            ((Response) schedulerResponse).attemptedNode);
-        isAdded = true;
-        break;
-      }
-    }
-    if (!isAdded) {
-      BatchedRequests br = new BatchedRequests(iteratorType,
-          schedulerResponse.getApplicationId(),
-          Collections.singleton(schedulerResponse.getSchedulingRequest()),
-          placementAttempt + 1);
-      reqsToRetry.add(br);
-      br.addToBlacklist(
-          schedulerResponse.getSchedulingRequest().getAllocationTags(),
-          ((Response) schedulerResponse).attemptedNode);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/SchedulerPlacementProcessor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/SchedulerPlacementProcessor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/SchedulerPlacementProcessor.java
new file mode 100644
index 0000000..5332e34
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/processor/SchedulerPlacementProcessor.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.processor;
+
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Forwarding SchedulingRequests to be handled by the scheduler, as long as the
+ * scheduler supports SchedulingRequests.
+ */
+public class SchedulerPlacementProcessor extends AbstractPlacementProcessor {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(SchedulerPlacementProcessor.class);
+
+  @Override
+  public void allocate(ApplicationAttemptId appAttemptId,
+      AllocateRequest request, AllocateResponse response) throws YarnException {
+    if (request.getSchedulingRequests() != null
+        && !request.getSchedulingRequests().isEmpty()) {
+      if (!(scheduler instanceof CapacityScheduler)) {
+        String message = "Found non empty SchedulingRequest of "
+            + "AllocateRequest for application=" + appAttemptId.toString()
+            + ", however the configured scheduler="
+            + scheduler.getClass().getCanonicalName()
+            + " cannot handle placement constraints, rejecting this "
+            + "allocate operation";
+        LOG.warn(message);
+        throw new YarnException(message);
+      }
+    }
+    nextAMSProcessor.allocate(appAttemptId, request, response);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSchedulingRequestUpdate.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSchedulingRequestUpdate.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSchedulingRequestUpdate.java
index 484d780..ee7e013 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSchedulingRequestUpdate.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSchedulingRequestUpdate.java
@@ -50,6 +50,8 @@ public class TestCapacitySchedulerSchedulingRequestUpdate
     Configuration conf = TestUtils.getConfigurationWithQueueLabels(
         new Configuration(false));
     conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true);
+    conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
+        YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER);
 
     final RMNodeLabelsManager mgr = new NullRMNodeLabelsManager();
     mgr.init(conf);
@@ -166,6 +168,8 @@ public class TestCapacitySchedulerSchedulingRequestUpdate
     Configuration conf = TestUtils.getConfigurationWithQueueLabels(
         new Configuration(false));
     conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true);
+    conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
+        YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER);
 
     final RMNodeLabelsManager mgr = new NullRMNodeLabelsManager();
     mgr.init(conf);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestSchedulingRequestContainerAllocation.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestSchedulingRequestContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestSchedulingRequestContainerAllocation.java
index b297f79..27d8661 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestSchedulingRequestContainerAllocation.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestSchedulingRequestContainerAllocation.java
@@ -58,8 +58,8 @@ public class TestSchedulingRequestContainerAllocation {
   public void testIntraAppAntiAffinity() throws Exception {
     Configuration csConf = TestUtils.getConfigurationWithMultipleQueues(
         new Configuration());
-    csConf.setBoolean(CapacitySchedulerConfiguration.SCHEDULING_REQUEST_ALLOWED,
-        true);
+    csConf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
+        YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER);
 
     // inject node label manager
     MockRM rm1 = new MockRM(csConf) {
@@ -141,8 +141,8 @@ public class TestSchedulingRequestContainerAllocation {
   public void testIntraAppAntiAffinityWithMultipleTags() throws Exception {
     Configuration csConf = TestUtils.getConfigurationWithMultipleQueues(
         new Configuration());
-    csConf.setBoolean(CapacitySchedulerConfiguration.SCHEDULING_REQUEST_ALLOWED,
-        true);
+    csConf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
+        YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER);
 
     // inject node label manager
     MockRM rm1 = new MockRM(csConf) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestSchedulingRequestContainerAllocationAsync.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestSchedulingRequestContainerAllocationAsync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestSchedulingRequestContainerAllocationAsync.java
index fc1cb0d..d1d05dc 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestSchedulingRequestContainerAllocationAsync.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestSchedulingRequestContainerAllocationAsync.java
@@ -57,13 +57,13 @@ public class TestSchedulingRequestContainerAllocationAsync {
   private void testIntraAppAntiAffinityAsync(int numThreads) throws Exception {
     Configuration csConf = TestUtils.getConfigurationWithMultipleQueues(
         new Configuration());
-    csConf.setBoolean(CapacitySchedulerConfiguration.SCHEDULING_REQUEST_ALLOWED,
-        true);
     csConf.setInt(
         CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_MAXIMUM_THREAD,
         numThreads);
     csConf.setInt(CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_PREFIX
         + ".scheduling-interval-ms", 0);
+    csConf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
+        YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER);
 
     // inject node label manager
     MockRM rm1 = new MockRM(csConf) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
index 7180e24..fae63be 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
@@ -275,9 +275,7 @@ public class TestUtils {
   public static Configuration getConfigurationWithQueueLabels(Configuration config) {
     CapacitySchedulerConfiguration conf =
         new CapacitySchedulerConfiguration(config);
-    conf.setBoolean(CapacitySchedulerConfiguration.SCHEDULING_REQUEST_ALLOWED,
-        true);
-    
+
     // Define top-level queues
     conf.setQueues(CapacitySchedulerConfiguration.ROOT, new String[] {"a", "b", "c"});
     conf.setCapacityByLabel(CapacitySchedulerConfiguration.ROOT, "x", 100);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0b489e56/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestPlacementProcessor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestPlacementProcessor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestPlacementProcessor.java
index c4c0b5d..e129a75 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestPlacementProcessor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestPlacementProcessor.java
@@ -86,8 +86,8 @@ public class TestPlacementProcessor {
     YarnConfiguration conf = new YarnConfiguration(csConf);
     conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
         ResourceScheduler.class);
-    conf.setBoolean(
-        YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_ENABLED, true);
+    conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
+        YarnConfiguration.PROCESSOR_RM_PLACEMENT_CONSTRAINTS_HANDLER);
     conf.setInt(
         YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_RETRY_ATTEMPTS, 1);
     startRM(conf);
@@ -381,8 +381,8 @@ public class TestPlacementProcessor {
     YarnConfiguration conf = new YarnConfiguration(csConf);
     conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
         ResourceScheduler.class);
-    conf.setBoolean(
-        YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_ENABLED, true);
+    conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
+        YarnConfiguration.PROCESSOR_RM_PLACEMENT_CONSTRAINTS_HANDLER);
     startRM(conf);
 
     HashMap<NodeId, MockNM> nodes = new HashMap<>();
@@ -533,8 +533,8 @@ public class TestPlacementProcessor {
     YarnConfiguration conf = new YarnConfiguration(csConf);
     conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
         ResourceScheduler.class);
-    conf.setBoolean(
-        YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_ENABLED, true);
+    conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
+        YarnConfiguration.PROCESSOR_RM_PLACEMENT_CONSTRAINTS_HANDLER);
     conf.setInt(
         YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_RETRY_ATTEMPTS, 2);
     startRM(conf);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[15/21] hadoop git commit: YARN-7292. Retrospect Resource Profile Behavior for overriding capability. Contributed by Wangda Tan.

Posted by ha...@apache.org.

YARN-7292. Retrospect Resource Profile Behavior for overriding capability. Contributed by Wangda Tan.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/aae62991
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/aae62991
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/aae62991

Branch: refs/heads/HDFS-12996
Commit: aae629913cee0157c945a2c7384c7bf398f10616
Parents: 8013475
Author: Sunil G <su...@apache.org>
Authored: Fri Feb 16 12:37:40 2018 +0530
Committer: Sunil G <su...@apache.org>
Committed: Fri Feb 16 12:37:40 2018 +0530

----------------------------------------------------------------------
 .../yarn/api/records/ProfileCapability.java     | 173 -------------------
 .../yarn/api/records/ResourceRequest.java       |  43 +----
 .../src/main/proto/yarn_protos.proto            |   6 -
 .../distributedshell/ApplicationMaster.java     |  22 +--
 .../applications/distributedshell/Client.java   |  37 ++--
 .../hadoop/yarn/client/api/AMRMClient.java      |  77 ++-------
 .../yarn/client/api/impl/AMRMClientImpl.java    | 126 +++++++-------
 .../client/api/impl/RemoteRequestsTable.java    | 100 ++++-------
 .../yarn/client/api/impl/TestAMRMClient.java    |  73 +++-----
 .../impl/TestAMRMClientContainerRequest.java    |   5 +-
 .../yarn/client/api/impl/TestNMClient.java      |  37 ++--
 ...TestOpportunisticContainerAllocationE2E.java |  25 ++-
 .../impl/pb/ProfileCapabilityPBImpl.java        | 126 --------------
 .../records/impl/pb/ResourceRequestPBImpl.java  |  40 +----
 .../hadoop/yarn/api/TestPBImplRecords.java      |  10 --
 .../hadoop/yarn/api/TestProfileCapability.java  | 109 ------------
 .../hadoop/yarn/server/utils/BuilderUtils.java  |   1 -
 .../server/resourcemanager/RMServerUtils.java   |  33 ----
 .../scheduler/SchedulerUtils.java               |  10 --
 .../TestApplicationMasterService.java           |   6 -
 .../resource/MockResourceProfileManager.java    |  79 ---------
 ...CapacitySchedulerWithMultiResourceTypes.java | 110 ------------
 22 files changed, 183 insertions(+), 1065 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ProfileCapability.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ProfileCapability.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ProfileCapability.java
deleted file mode 100644
index d6cb635..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ProfileCapability.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.api.records;
-
-import com.google.common.base.Preconditions;
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.yarn.util.Records;
-
-import java.util.Map;
-
-/**
- * Class to capture capability requirements when using resource profiles. The
- * ProfileCapability is meant to be used as part of the ResourceRequest. A
- * profile capability has two pieces - the resource profile name and the
- * overrides. The resource profile specifies the name of the resource profile
- * to be used and the capability override is the overrides desired on specific
- * resource types.
- *
- * For example, if you have a resource profile "small" that maps to
- * {@literal <4096M, 2 cores, 1 gpu>} and you set the capability override to
- * {@literal <8192M, 0 cores, 0 gpu>}, then the actual resource allocation on
- * the ResourceManager will be {@literal <8192M, 2 cores, 1 gpu>}.
- *
- * Note that the conversion from the ProfileCapability to the Resource class
- * with the actual resource requirements will be done by the ResourceManager,
- * which has the actual profile to Resource mapping.
- *
- */
-@InterfaceAudience.Public
-@InterfaceStability.Unstable
-public abstract class ProfileCapability {
-
-  public static final String DEFAULT_PROFILE = "default";
-
-  public static ProfileCapability newInstance(Resource override) {
-    return newInstance(DEFAULT_PROFILE, override);
-  }
-
-  public static ProfileCapability newInstance(String profile) {
-    Preconditions
-        .checkArgument(profile != null, "The profile name cannot be null");
-    ProfileCapability obj = Records.newRecord(ProfileCapability.class);
-    obj.setProfileName(profile);
-    obj.setProfileCapabilityOverride(Resource.newInstance(0, 0));
-    return obj;
-  }
-
-  public static ProfileCapability newInstance(String profile,
-      Resource override) {
-    Preconditions
-        .checkArgument(profile != null, "The profile name cannot be null");
-    ProfileCapability obj = Records.newRecord(ProfileCapability.class);
-    obj.setProfileName(profile);
-    obj.setProfileCapabilityOverride(override);
-    return obj;
-  }
-
-  /**
-   * Get the profile name.
-   * @return the profile name
-   */
-  public abstract String getProfileName();
-
-  /**
-   * Get the profile capability override.
-   * @return Resource object containing the override.
-   */
-  public abstract Resource getProfileCapabilityOverride();
-
-  /**
-   * Set the resource profile name.
-   * @param profileName the resource profile name
-   */
-  public abstract void setProfileName(String profileName);
-
-  /**
-   * Set the capability override to override specific resource types on the
-   * resource profile.
-   *
-   * For example, if you have a resource profile "small" that maps to
-   * {@literal <4096M, 2 cores, 1 gpu>} and you set the capability override to
-   * {@literal <8192M, 0 cores, 0 gpu>}, then the actual resource allocation on
-   * the ResourceManager will be {@literal <8192M, 2 cores, 1 gpu>}.
-   *
-   * Note that the conversion from the ProfileCapability to the Resource class
-   * with the actual resource requirements will be done by the ResourceManager,
-   * which has the actual profile to Resource mapping.
-   *
-   * @param r Resource object containing the capability override
-   */
-  public abstract void setProfileCapabilityOverride(Resource r);
-
-  @Override
-  public boolean equals(Object other) {
-    if (this == other) {
-      return true;
-    }
-    if (other == null || !(other instanceof ProfileCapability)) {
-      return false;
-    }
-    return ((ProfileCapability) other).getProfileName()
-        .equals(this.getProfileName()) && ((ProfileCapability) other)
-        .getProfileCapabilityOverride()
-        .equals(this.getProfileCapabilityOverride());
-  }
-
-  @Override
-  public int hashCode() {
-    final int prime = 2153;
-    int result = 2459;
-    String name = getProfileName();
-    Resource override = getProfileCapabilityOverride();
-    result = prime * result + ((name == null) ? 0 : name.hashCode());
-    result = prime * result + ((override == null) ? 0 : override.hashCode());
-    return result;
-  }
-
-  @Override
-  public String toString() {
-    return "{ profile: " + this.getProfileName() + ", capabilityOverride: "
-        + this.getProfileCapabilityOverride() + " }";
-  }
-
-  /**
-   * Get a representation of the capability as a Resource object.
-   * @param capability the capability we wish to convert
-   * @param resourceProfilesMap map of profile name to Resource object
-   * @return Resource object representing the capability
-   */
-  public static Resource toResource(ProfileCapability capability,
-      Map<String, Resource> resourceProfilesMap) {
-    Preconditions
-        .checkArgument(capability != null, "Capability cannot be null");
-    Preconditions.checkArgument(resourceProfilesMap != null,
-        "Resource profiles map cannot be null");
-    Resource none = Resource.newInstance(0, 0);
-    Resource resource = Resource.newInstance(0, 0);
-    String profileName = capability.getProfileName();
-    if (null == profileName || profileName.isEmpty()) {
-      profileName = DEFAULT_PROFILE;
-    }
-    if (resourceProfilesMap.containsKey(profileName)) {
-      resource = Resource.newInstance(resourceProfilesMap.get(profileName));
-    }
-    if (capability.getProfileCapabilityOverride() != null &&
-        !capability.getProfileCapabilityOverride().equals(none)) {
-      for (ResourceInformation entry : capability
-          .getProfileCapabilityOverride().getResources()) {
-        if (entry != null && entry.getValue() > 0) {
-          resource.setResourceInformation(entry.getName(), entry);
-        }
-      }
-    }
-    return resource;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java
index e46647a..eea81fe 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.yarn.api.records;
 import java.io.Serializable;
 
 import org.apache.hadoop.classification.InterfaceAudience.Public;
-import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.classification.InterfaceStability.Evolving;
 import org.apache.hadoop.classification.InterfaceStability.Stable;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
@@ -99,21 +98,7 @@ public abstract class ResourceRequest implements Comparable<ResourceRequest> {
         .resourceName(hostName).capability(capability)
         .numContainers(numContainers).relaxLocality(relaxLocality)
         .nodeLabelExpression(labelExpression)
-        .executionTypeRequest(executionTypeRequest).profileCapability(null)
-        .build();
-  }
-
-  @Public
-  @Unstable
-  public static ResourceRequest newInstance(Priority priority, String hostName,
-      Resource capability, int numContainers, boolean relaxLocality,
-      String labelExpression, ExecutionTypeRequest executionTypeRequest,
-      ProfileCapability profile) {
-    return ResourceRequest.newBuilder().priority(priority)
-        .resourceName(hostName).capability(capability)
-        .numContainers(numContainers).relaxLocality(relaxLocality)
-        .nodeLabelExpression(labelExpression)
-        .executionTypeRequest(executionTypeRequest).profileCapability(profile)
+        .executionTypeRequest(executionTypeRequest)
         .build();
   }
 
@@ -140,7 +125,6 @@ public abstract class ResourceRequest implements Comparable<ResourceRequest> {
       resourceRequest.setRelaxLocality(true);
       resourceRequest.setExecutionTypeRequest(
           ExecutionTypeRequest.newInstance());
-      resourceRequest.setProfileCapability(null);
     }
 
     /**
@@ -271,21 +255,6 @@ public abstract class ResourceRequest implements Comparable<ResourceRequest> {
     }
 
     /**
-     * Set the <code>resourceProfile</code> of the request.
-     * @see ResourceRequest#setProfileCapability(ProfileCapability)
-     * @param profileCapability
-     *          <code>profileCapability</code> of the request
-     * @return {@link ResourceRequestBuilder}
-     */
-    @Public
-    @InterfaceStability.Unstable
-    public ResourceRequestBuilder profileCapability(
-        ProfileCapability profileCapability) {
-      resourceRequest.setProfileCapability(profileCapability);
-      return this;
-    }
-
-    /**
      * Return generated {@link ResourceRequest} object.
      * @return {@link ResourceRequest}
      */
@@ -502,14 +471,6 @@ public abstract class ResourceRequest implements Comparable<ResourceRequest> {
   @Evolving
   public abstract void setNodeLabelExpression(String nodelabelExpression);
 
-  @Public
-  @InterfaceStability.Unstable
-  public abstract ProfileCapability getProfileCapability();
-
-  @Public
-  @InterfaceStability.Unstable
-  public abstract void setProfileCapability(ProfileCapability p);
-
   /**
    * Get the optional <em>ID</em> corresponding to this allocation request. This
    * ID is an identifier for different {@code ResourceRequest}s from the <b>same
@@ -585,14 +546,12 @@ public abstract class ResourceRequest implements Comparable<ResourceRequest> {
     Resource capability = getCapability();
     String hostName = getResourceName();
     Priority priority = getPriority();
-    ProfileCapability profile = getProfileCapability();
     result =
         prime * result + ((capability == null) ? 0 : capability.hashCode());
     result = prime * result + ((hostName == null) ? 0 : hostName.hashCode());
     result = prime * result + getNumContainers();
     result = prime * result + ((priority == null) ? 0 : priority.hashCode());
     result = prime * result + Long.valueOf(getAllocationRequestId()).hashCode();
-    result = prime * result + ((profile == null) ? 0 : profile.hashCode());
     return result;
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
index 25c8569..d573638 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
@@ -384,11 +384,6 @@ enum ExecutionTypeProto {
 ////////////////////////////////////////////////////////////////////////
 ////// From AM_RM_Protocol /////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
-message ProfileCapabilityProto {
-  required string profile = 1;
-  required ResourceProto profileCapabilityOverride = 2;
-}
-
 message ResourceRequestProto {
   optional PriorityProto priority = 1;
   optional string resource_name = 2;
@@ -398,7 +393,6 @@ message ResourceRequestProto {
   optional string node_label_expression = 6;
   optional ExecutionTypeRequestProto execution_type_request = 7;
   optional int64 allocation_request_id = 8 [default = -1];
-  optional ProfileCapabilityProto profile = 9;
 }
 
 message ExecutionTypeRequestProto {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
index a06ee7c..b2e3f41 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
@@ -89,7 +89,6 @@ import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
 import org.apache.hadoop.yarn.api.records.RejectedSchedulingRequest;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
@@ -1422,10 +1421,11 @@ public class ApplicationMaster {
     Priority pri = Priority.newInstance(requestPriority);
 
     // Set up resource type requirements
-    ContainerRequest request =
-        new ContainerRequest(createProfileCapability(), null, null,
-            pri, 0, true, null,
-            ExecutionTypeRequest.newInstance(containerType));
+    ContainerRequest request = new ContainerRequest(
+        getTaskResourceCapability(),
+        null, null, pri, 0, true, null,
+        ExecutionTypeRequest.newInstance(containerType),
+        containerResourceProfile);
     LOG.info("Requested container ask: " + request.toString());
     return request;
   }
@@ -1437,7 +1437,7 @@ public class ApplicationMaster {
         ExecutionTypeRequest.newInstance(),
         Collections.singleton(spec.sourceTag),
         ResourceSizing.newInstance(
-            createProfileCapability().getProfileCapabilityOverride()), null);
+            getTaskResourceCapability()), null);
     sReq.setPlacementConstraint(spec.constraint);
     LOG.info("Scheduling Request made: " + sReq.toString());
     return sReq;
@@ -1702,7 +1702,7 @@ public class ApplicationMaster {
     }
   }
 
-  private ProfileCapability createProfileCapability()
+  private Resource getTaskResourceCapability()
       throws YarnRuntimeException {
     if (containerMemory < -1 || containerMemory == 0) {
       throw new YarnRuntimeException("Value of AM memory '" + containerMemory
@@ -1727,12 +1727,6 @@ public class ApplicationMaster {
       resourceCapability.setResourceValue(entry.getKey(), entry.getValue());
     }
 
-    String profileName = containerResourceProfile;
-    if ("".equals(containerResourceProfile) && resourceProfiles != null) {
-      profileName = "default";
-    }
-    ProfileCapability capability =
-        ProfileCapability.newInstance(profileName, resourceCapability);
-    return capability;
+    return resourceCapability;
   }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
index ac58662..06f0fd2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
@@ -68,7 +68,6 @@ import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.NodeState;
 import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
 import org.apache.hadoop.yarn.api.records.QueueACL;
 import org.apache.hadoop.yarn.api.records.QueueInfo;
 import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
@@ -1123,10 +1122,17 @@ public class Client {
           + " application master, exiting. " +
           "Specified virtual cores=" + amVCores);
     }
-    String tmp = amResourceProfile;
-    if (amResourceProfile.isEmpty()) {
-      tmp = "default";
+    Resource capability = Resource.newInstance(0, 0);
+
+    if (!amResourceProfile.isEmpty()) {
+      if (!profiles.containsKey(amResourceProfile)) {
+        throw new IllegalArgumentException(
+            "Failed to find specified resource profile for application master="
+                + amResourceProfile);
+      }
+      capability = Resources.clone(profiles.get(amResourceProfile));
     }
+
     if (appContext.getAMContainerResourceRequests() == null) {
       List<ResourceRequest> amResourceRequests = new ArrayList<ResourceRequest>();
       amResourceRequests
@@ -1135,31 +1141,26 @@ public class Client {
       appContext.setAMContainerResourceRequests(amResourceRequests);
     }
 
-    if (appContext.getAMContainerResourceRequests().get(0)
-        .getProfileCapability() == null) {
-      appContext.getAMContainerResourceRequests().get(0).setProfileCapability(
-          ProfileCapability.newInstance(tmp, Resource.newInstance(0, 0)));
-    }
-
-    Resource capability = Resource.newInstance(0, 0);
-
     validateResourceTypes(amResources.keySet(), resourceTypes);
     for (Map.Entry<String, Long> entry : amResources.entrySet()) {
       capability.setResourceValue(entry.getKey(), entry.getValue());
     }
     // set amMemory because it's used to set Xmx param
     if (amMemory == -1) {
-      amMemory = (profiles == null) ? DEFAULT_AM_MEMORY :
-          profiles.get(tmp).getMemorySize();
+      amMemory = DEFAULT_AM_MEMORY;
+      LOG.warn("AM Memory not specified, use " + DEFAULT_AM_MEMORY
+          + " mb as AM memory");
     }
     if (amVCores == -1) {
-      amVCores = (profiles == null) ? DEFAULT_AM_VCORES :
-          profiles.get(tmp).getVirtualCores();
+      amVCores = DEFAULT_AM_VCORES;
+      LOG.warn("AM vcore not specified, use " + DEFAULT_AM_VCORES
+          + " mb as AM vcores");
     }
     capability.setMemorySize(amMemory);
     capability.setVirtualCores(amVCores);
-    appContext.getAMContainerResourceRequests().get(0).getProfileCapability()
-        .setProfileCapabilityOverride(capability);
+    appContext.getAMContainerResourceRequests().get(0).setCapability(
+        capability);
+    LOG.warn("AM Resource capability=" + capability);
   }
 
   private void setContainerResources(Map<String, Resource> profiles,

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java
index 914a146..32aa21d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java
@@ -25,6 +25,7 @@ import java.util.Set;
 import java.util.function.Supplier;
 import java.util.List;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
@@ -39,7 +40,6 @@ import org.apache.hadoop.yarn.api.records.ExecutionType;
 import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.SchedulingRequest;
 import org.apache.hadoop.yarn.api.records.UpdateContainerRequest;
@@ -123,7 +123,7 @@ public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends
     private String nodeLabelsExpression;
     private ExecutionTypeRequest executionTypeRequest =
         ExecutionTypeRequest.newInstance();
-    private String resourceProfile = ProfileCapability.DEFAULT_PROFILE;
+    private String resourceProfile = null;
 
     /**
      * Instantiates a {@link ContainerRequest} with the given constraints and
@@ -146,6 +146,13 @@ public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends
       this(capability, nodes, racks, priority, true, null);
     }
 
+    @VisibleForTesting
+    public ContainerRequest(Resource capability, String[] nodes, String[] racks,
+        Priority priority, String profile) {
+      this(capability, nodes, racks, priority, 0, true, null,
+          ExecutionTypeRequest.newInstance(), profile);
+    }
+
     /**
      * Instantiates a {@link ContainerRequest} with the given constraints and
      * locality relaxation enabled.
@@ -170,27 +177,7 @@ public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends
       this(capability, nodes, racks, priority, allocationRequestId, true, null,
           ExecutionTypeRequest.newInstance());
     }
-    /**
-     * Instantiates a {@link ContainerRequest} with the given constraints and
-     * locality relaxation enabled.
-     *
-     * @param capability
-     *          The {@link ProfileCapability} to be requested for each container.
-     * @param nodes
-     *          Any hosts to request that the containers are placed on.
-     * @param racks
-     *          Any racks to request that the containers are placed on. The
-     *          racks corresponding to any hosts requested will be automatically
-     *          added to this list.
-     * @param priority
-     *          The priority at which to request the containers. Higher
-     *          priorities have lower numerical values.
-     */
-    public ContainerRequest(ProfileCapability capability, String[] nodes,
-        String[] racks, Priority priority) {
-      this(capability, nodes, racks, priority, 0, true, null);
-    }
-    
+
     /**
      * Instantiates a {@link ContainerRequest} with the given constraints.
      * 
@@ -218,29 +205,6 @@ public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends
      * Instantiates a {@link ContainerRequest} with the given constraints.
      *
      * @param capability
-     *          The {@link ProfileCapability} to be requested for each container.
-     * @param nodes
-     *          Any hosts to request that the containers are placed on.
-     * @param racks
-     *          Any racks to request that the containers are placed on. The
-     *          racks corresponding to any hosts requested will be automatically
-     *          added to this list.
-     * @param priority
-     *          The priority at which to request the containers. Higher
-     *          priorities have lower numerical values.
-     * @param relaxLocality
-     *          If true, containers for this request may be assigned on hosts
-     *          and racks other than the ones explicitly requested.
-     */
-    public ContainerRequest(ProfileCapability capability, String[] nodes,
-        String[] racks, Priority priority, boolean relaxLocality) {
-      this(capability, nodes, racks, priority, 0, relaxLocality, null);
-    }
-
-    /**
-     * Instantiates a {@link ContainerRequest} with the given constraints.
-     *
-     * @param capability
      *          The {@link Resource} to be requested for each container.
      * @param nodes
      *          Any hosts to request that the containers are placed on.
@@ -328,14 +292,6 @@ public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends
           ExecutionTypeRequest.newInstance());
     }
 
-    public ContainerRequest(ProfileCapability capability, String[] nodes,
-        String[] racks, Priority priority, long allocationRequestId,
-        boolean relaxLocality, String nodeLabelsExpression) {
-      this(capability, nodes, racks, priority, allocationRequestId,
-          relaxLocality, nodeLabelsExpression,
-          ExecutionTypeRequest.newInstance());
-    }
-
     /**
      * Instantiates a {@link ContainerRequest} with the given constraints.
      *
@@ -369,18 +325,9 @@ public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends
         ExecutionTypeRequest executionTypeRequest) {
       this(capability, nodes, racks, priority, allocationRequestId,
           relaxLocality, nodeLabelsExpression, executionTypeRequest,
-          ProfileCapability.DEFAULT_PROFILE);
+          null);
     }
 
-    public ContainerRequest(ProfileCapability capability, String[] nodes,
-        String[] racks, Priority priority, long allocationRequestId,
-        boolean relaxLocality, String nodeLabelsExpression,
-        ExecutionTypeRequest executionTypeRequest) {
-      this(capability.getProfileCapabilityOverride(), nodes, racks, priority,
-          allocationRequestId, relaxLocality, nodeLabelsExpression,
-          executionTypeRequest, capability.getProfileName());
-    }
-          
     /**
      * Instantiates a {@link ContainerRequest} with the given constraints.
      * 
@@ -779,7 +726,7 @@ public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends
   @InterfaceStability.Evolving
   public List<? extends Collection<T>> getMatchingRequests(
       Priority priority, String resourceName, ExecutionType executionType,
-      ProfileCapability capability) {
+      Resource capability, String profile) {
     throw new UnsupportedOperationException("The sub-class extending" +
         " AMRMClient is expected to implement this !!");
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java
index 8e2336f..a8e4dfc 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.yarn.client.api.impl;
 
 import java.io.IOException;
+import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -58,9 +59,9 @@ import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.NMToken;
 import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.api.records.SchedulingRequest;
 import org.apache.hadoop.yarn.api.records.Token;
@@ -124,14 +125,11 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
     LinkedHashSet<T> containerRequests;
 
     ResourceRequestInfo(Long allocationRequestId, Priority priority,
-        String resourceName, Resource capability, boolean relaxLocality,
-        String resourceProfile) {
-      ProfileCapability profileCapability = ProfileCapability
-          .newInstance(resourceProfile, capability);
+        String resourceName, Resource capability, boolean relaxLocality) {
       remoteRequest = ResourceRequest.newBuilder().priority(priority)
           .resourceName(resourceName).capability(capability).numContainers(0)
           .allocationRequestId(allocationRequestId).relaxLocality(relaxLocality)
-          .profileCapability(profileCapability).build();
+          .build();
       containerRequests = new LinkedHashSet<T>();
     }
   }
@@ -140,34 +138,13 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
    * Class compares Resource by memory, then cpu and then the remaining resource
    * types in reverse order.
    */
-  static class ProfileCapabilityComparator<T extends ProfileCapability>
-      implements Comparator<T> {
-
-    HashMap<String, Resource> resourceProfilesMap;
-
-    public ProfileCapabilityComparator(
-        HashMap<String, Resource> resourceProfileMap) {
-      this.resourceProfilesMap = resourceProfileMap;
-    }
-
-    public int compare(T arg0, T arg1) {
-      Resource resource0 =
-          ProfileCapability.toResource(arg0, resourceProfilesMap);
-      Resource resource1 =
-          ProfileCapability.toResource(arg1, resourceProfilesMap);
-      return resource1.compareTo(resource0);
+  static class ResourceReverseComparator<T extends Resource>
+      implements Comparator<T>, Serializable {
+    public int compare(Resource res0, Resource res1) {
+      return res1.compareTo(res0);
     }
   }
 
-  boolean canFit(ProfileCapability arg0, ProfileCapability arg1) {
-    Resource resource0 =
-        ProfileCapability.toResource(arg0, resourceProfilesMap);
-    Resource resource1 =
-        ProfileCapability.toResource(arg1, resourceProfilesMap);
-    return Resources.fitsIn(resource0, resource1);
-
-  }
-
   private final Map<Long, RemoteRequestsTable<T>> remoteRequests =
       new HashMap<>();
 
@@ -567,7 +544,7 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
               .nodeLabelExpression(r.getNodeLabelExpression())
               .executionTypeRequest(r.getExecutionTypeRequest())
               .allocationRequestId(r.getAllocationRequestId())
-              .profileCapability(r.getProfileCapability()).build();
+              .build();
       askList.add(rr);
     }
     return askList;
@@ -649,8 +626,6 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
   public synchronized void addContainerRequest(T req) {
     Preconditions.checkArgument(req != null,
         "Resource request can not be null.");
-    ProfileCapability profileCapability = ProfileCapability
-        .newInstance(req.getResourceProfile(), req.getCapability());
     Set<String> dedupedRacks = new HashSet<String>();
     if (req.getRacks() != null) {
       dedupedRacks.addAll(req.getRacks());
@@ -663,7 +638,8 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
     Set<String> inferredRacks = resolveRacks(req.getNodes());
     inferredRacks.removeAll(dedupedRacks);
 
-    checkResourceProfile(req.getResourceProfile());
+    Resource resource = checkAndGetResourceProfile(req.getResourceProfile(),
+        req.getCapability());
 
     // check that specific and non-specific requests cannot be mixed within a
     // priority
@@ -689,26 +665,26 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
       }
       for (String node : dedupedNodes) {
         addResourceRequest(req.getPriority(), node,
-            req.getExecutionTypeRequest(), profileCapability, req, true,
+            req.getExecutionTypeRequest(), resource, req, true,
             req.getNodeLabelExpression());
       }
     }
 
     for (String rack : dedupedRacks) {
       addResourceRequest(req.getPriority(), rack, req.getExecutionTypeRequest(),
-          profileCapability, req, true, req.getNodeLabelExpression());
+          resource, req, true, req.getNodeLabelExpression());
     }
 
     // Ensure node requests are accompanied by requests for
     // corresponding rack
     for (String rack : inferredRacks) {
       addResourceRequest(req.getPriority(), rack, req.getExecutionTypeRequest(),
-          profileCapability, req, req.getRelaxLocality(),
+          resource, req, req.getRelaxLocality(),
           req.getNodeLabelExpression());
     }
     // Off-switch
     addResourceRequest(req.getPriority(), ResourceRequest.ANY,
-        req.getExecutionTypeRequest(), profileCapability, req,
+        req.getExecutionTypeRequest(), resource, req,
         req.getRelaxLocality(), req.getNodeLabelExpression());
   }
 
@@ -716,8 +692,8 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
   public synchronized void removeContainerRequest(T req) {
     Preconditions.checkArgument(req != null,
         "Resource request can not be null.");
-    ProfileCapability profileCapability = ProfileCapability
-        .newInstance(req.getResourceProfile(), req.getCapability());
+    Resource resource = checkAndGetResourceProfile(req.getResourceProfile(),
+        req.getCapability());
     Set<String> allRacks = new HashSet<String>();
     if (req.getRacks() != null) {
       allRacks.addAll(req.getRacks());
@@ -728,17 +704,17 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
     if (req.getNodes() != null) {
       for (String node : new HashSet<String>(req.getNodes())) {
         decResourceRequest(req.getPriority(), node,
-            req.getExecutionTypeRequest(), profileCapability, req);
+            req.getExecutionTypeRequest(), resource, req);
       }
     }
 
     for (String rack : allRacks) {
       decResourceRequest(req.getPriority(), rack,
-          req.getExecutionTypeRequest(), profileCapability, req);
+          req.getExecutionTypeRequest(), resource, req);
     }
 
     decResourceRequest(req.getPriority(), ResourceRequest.ANY,
-        req.getExecutionTypeRequest(), profileCapability, req);
+        req.getExecutionTypeRequest(), resource, req);
   }
 
   @Override
@@ -833,26 +809,23 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
   }
 
   @Override
-  @SuppressWarnings("unchecked")
-  public synchronized List<? extends Collection<T>> getMatchingRequests(
-      Priority priority, String resourceName, ExecutionType executionType,
-      Resource capability) {
-    ProfileCapability profileCapability =
-        ProfileCapability.newInstance(capability);
-    return getMatchingRequests(priority, resourceName, executionType,
-        profileCapability);
+  public List<? extends Collection<T>> getMatchingRequests(Priority priority,
+      String resourceName, ExecutionType executionType,
+      Resource capability, String profile) {
+    capability = checkAndGetResourceProfile(profile, capability);
+    return getMatchingRequests(priority, resourceName, executionType, capability);
   }
 
   @Override
   @SuppressWarnings("unchecked")
   public synchronized List<? extends Collection<T>> getMatchingRequests(
       Priority priority, String resourceName, ExecutionType executionType,
-      ProfileCapability capability) {
+      Resource capability) {
     Preconditions.checkArgument(capability != null,
         "The Resource to be requested should not be null ");
     Preconditions.checkArgument(priority != null,
         "The priority at which to request containers should not be null ");
-    List<LinkedHashSet<T>> list = new LinkedList<LinkedHashSet<T>>();
+    List<LinkedHashSet<T>> list = new LinkedList<>();
 
     RemoteRequestsTable remoteRequestsTable = getTable(0);
 
@@ -864,7 +837,7 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
         // If no exact match. Container may be larger than what was requested.
         // get all resources <= capability. map is reverse sorted.
         for (ResourceRequestInfo<T> resReqInfo : matchingRequests) {
-          if (canFit(resReqInfo.remoteRequest.getProfileCapability(),
+          if (Resources.fitsIn(resReqInfo.remoteRequest.getCapability(),
               capability) && !resReqInfo.containerRequests.isEmpty()) {
             list.add(resReqInfo.containerRequests);
           }
@@ -921,13 +894,34 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
     }
   }
 
-  private void checkResourceProfile(String profile) {
-    if (resourceProfilesMap != null && !resourceProfilesMap.isEmpty()
-        && !resourceProfilesMap.containsKey(profile)) {
-      throw new InvalidContainerRequestException(
-          "Invalid profile name, valid profile names are " + resourceProfilesMap
-              .keySet());
+  // When profile and override resource are specified at the same time, override
+  // predefined resource value in profile if any resource type has a positive
+  // value.
+  private Resource checkAndGetResourceProfile(String profile,
+      Resource overrideResource) {
+    Resource returnResource = overrideResource;
+
+    // if application requested a non-empty/null profile, and the
+    if (profile != null && !profile.isEmpty()) {
+      if (resourceProfilesMap == null || (!resourceProfilesMap.containsKey(
+          profile))) {
+        throw new InvalidContainerRequestException(
+            "Invalid profile name specified=" + profile + (
+                resourceProfilesMap == null ?
+                    "" :
+                    (", valid profile names are " + resourceProfilesMap
+                        .keySet())));
+      }
+      returnResource = Resources.clone(resourceProfilesMap.get(profile));
+      for (ResourceInformation info : overrideResource
+          .getAllResourcesListCopy()) {
+        if (info.getValue() > 0) {
+          returnResource.setResourceInformation(info.getName(), info);
+        }
+      }
     }
+
+    return returnResource;
   }
   
   /**
@@ -1016,16 +1010,12 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
   }
 
   private void addResourceRequest(Priority priority, String resourceName,
-      ExecutionTypeRequest execTypeReq, ProfileCapability capability, T req,
+      ExecutionTypeRequest execTypeReq, Resource capability, T req,
       boolean relaxLocality, String labelExpression) {
     RemoteRequestsTable<T> remoteRequestsTable =
         getTable(req.getAllocationRequestId());
     if (remoteRequestsTable == null) {
-      remoteRequestsTable = new RemoteRequestsTable<T>();
-      if (this.resourceProfilesMap instanceof HashMap) {
-        remoteRequestsTable.setResourceComparator(
-            new ProfileCapabilityComparator((HashMap) resourceProfilesMap));
-      }
+      remoteRequestsTable = new RemoteRequestsTable<>();
       putTable(req.getAllocationRequestId(), remoteRequestsTable);
     }
     @SuppressWarnings("unchecked")
@@ -1048,7 +1038,7 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
   }
 
   private void decResourceRequest(Priority priority, String resourceName,
-      ExecutionTypeRequest execTypeReq, ProfileCapability capability, T req) {
+      ExecutionTypeRequest execTypeReq, Resource capability, T req) {
     RemoteRequestsTable<T> remoteRequestsTable =
         getTable(req.getAllocationRequestId());
     if (remoteRequestsTable != null) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/RemoteRequestsTable.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/RemoteRequestsTable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/RemoteRequestsTable.java
index 5527132..e1b7bb2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/RemoteRequestsTable.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/RemoteRequestsTable.java
@@ -21,7 +21,7 @@ package org.apache.hadoop.yarn.client.api.impl;
 import org.apache.hadoop.yarn.api.records.ExecutionType;
 import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
 import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
+import org.apache.hadoop.yarn.api.records.Resource;
 
 import java.util.Collection;
 import java.util.HashMap;
@@ -33,7 +33,6 @@ import java.util.TreeMap;
 
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl.ResourceRequestInfo;
-import org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl.ProfileCapabilityComparator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -42,36 +41,34 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
   private static final Logger LOG =
           LoggerFactory.getLogger(RemoteRequestsTable.class);
 
-  private ProfileCapabilityComparator resourceComparator;
-
   /**
    * Nested Iterator that iterates over just the ResourceRequestInfo
    * object.
    */
   class RequestInfoIterator implements Iterator<ResourceRequestInfo> {
-    private Iterator<Map<String, Map<ExecutionType, TreeMap<ProfileCapability,
+    private Iterator<Map<String, Map<ExecutionType, TreeMap<Resource,
         ResourceRequestInfo>>>> iLocMap;
-    private Iterator<Map<ExecutionType, TreeMap<ProfileCapability,
+    private Iterator<Map<ExecutionType, TreeMap<Resource,
         ResourceRequestInfo>>> iExecTypeMap;
-    private Iterator<TreeMap<ProfileCapability, ResourceRequestInfo>> iCapMap;
+    private Iterator<TreeMap<Resource, ResourceRequestInfo>> iCapMap;
     private Iterator<ResourceRequestInfo> iResReqInfo;
 
     public RequestInfoIterator(Iterator<Map<String,
-        Map<ExecutionType, TreeMap<ProfileCapability, ResourceRequestInfo>>>>
+        Map<ExecutionType, TreeMap<Resource, ResourceRequestInfo>>>>
         iLocationMap) {
       this.iLocMap = iLocationMap;
       if (iLocMap.hasNext()) {
         iExecTypeMap = iLocMap.next().values().iterator();
       } else {
         iExecTypeMap =
-            new LinkedList<Map<ExecutionType, TreeMap<ProfileCapability,
+            new LinkedList<Map<ExecutionType, TreeMap<Resource,
                 ResourceRequestInfo>>>().iterator();
       }
       if (iExecTypeMap.hasNext()) {
         iCapMap = iExecTypeMap.next().values().iterator();
       } else {
         iCapMap =
-            new LinkedList<TreeMap<ProfileCapability, ResourceRequestInfo>>()
+            new LinkedList<TreeMap<Resource, ResourceRequestInfo>>()
                 .iterator();
       }
       if (iCapMap.hasNext()) {
@@ -113,7 +110,7 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
   // Nest map with Primary key :
   // Priority -> ResourceName(String) -> ExecutionType -> Capability(Resource)
   // and value : ResourceRequestInfo
-  private Map<Priority, Map<String, Map<ExecutionType, TreeMap<ProfileCapability,
+  private Map<Priority, Map<String, Map<ExecutionType, TreeMap<Resource,
       ResourceRequestInfo>>>> remoteRequestsTable = new HashMap<>();
 
   @Override
@@ -122,8 +119,8 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
   }
 
   ResourceRequestInfo get(Priority priority, String location,
-      ExecutionType execType, ProfileCapability capability) {
-    TreeMap<ProfileCapability, ResourceRequestInfo> capabilityMap =
+      ExecutionType execType, Resource capability) {
+    TreeMap<Resource, ResourceRequestInfo> capabilityMap =
         getCapabilityMap(priority, location, execType);
     if (capabilityMap == null) {
       return null;
@@ -133,8 +130,8 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
 
   @SuppressWarnings("unchecked")
   void put(Priority priority, String resourceName, ExecutionType execType,
-      ProfileCapability capability, ResourceRequestInfo resReqInfo) {
-    Map<String, Map<ExecutionType, TreeMap<ProfileCapability,
+      Resource capability, ResourceRequestInfo resReqInfo) {
+    Map<String, Map<ExecutionType, TreeMap<Resource,
         ResourceRequestInfo>>> locationMap =
         remoteRequestsTable.get(priority);
     if (locationMap == null) {
@@ -144,7 +141,7 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
         LOG.debug("Added priority=" + priority);
       }
     }
-    Map<ExecutionType, TreeMap<ProfileCapability, ResourceRequestInfo>>
+    Map<ExecutionType, TreeMap<Resource, ResourceRequestInfo>>
         execTypeMap = locationMap.get(resourceName);
     if (execTypeMap == null) {
       execTypeMap = new HashMap<>();
@@ -153,15 +150,10 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
         LOG.debug("Added resourceName=" + resourceName);
       }
     }
-    TreeMap<ProfileCapability, ResourceRequestInfo> capabilityMap =
+    TreeMap<Resource, ResourceRequestInfo> capabilityMap =
         execTypeMap.get(execType);
     if (capabilityMap == null) {
-      // this can happen if the user doesn't register with the RM before
-      // calling addResourceRequest
-      if (resourceComparator == null) {
-        resourceComparator = new ProfileCapabilityComparator(new HashMap<>());
-      }
-      capabilityMap = new TreeMap<>(resourceComparator);
+      capabilityMap = new TreeMap<>(new AMRMClientImpl.ResourceReverseComparator());
       execTypeMap.put(execType, capabilityMap);
       if (LOG.isDebugEnabled()) {
         LOG.debug("Added Execution Type=" + execType);
@@ -171,9 +163,9 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
   }
 
   ResourceRequestInfo remove(Priority priority, String resourceName,
-      ExecutionType execType, ProfileCapability capability) {
+      ExecutionType execType, Resource capability) {
     ResourceRequestInfo retVal = null;
-    Map<String, Map<ExecutionType, TreeMap<ProfileCapability,
+    Map<String, Map<ExecutionType, TreeMap<Resource,
         ResourceRequestInfo>>> locationMap = remoteRequestsTable.get(priority);
     if (locationMap == null) {
       if (LOG.isDebugEnabled()) {
@@ -181,7 +173,7 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
       }
       return null;
     }
-    Map<ExecutionType, TreeMap<ProfileCapability, ResourceRequestInfo>>
+    Map<ExecutionType, TreeMap<Resource, ResourceRequestInfo>>
         execTypeMap = locationMap.get(resourceName);
     if (execTypeMap == null) {
       if (LOG.isDebugEnabled()) {
@@ -189,7 +181,7 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
       }
       return null;
     }
-    TreeMap<ProfileCapability, ResourceRequestInfo> capabilityMap =
+    TreeMap<Resource, ResourceRequestInfo> capabilityMap =
         execTypeMap.get(execType);
     if (capabilityMap == null) {
       if (LOG.isDebugEnabled()) {
@@ -210,14 +202,14 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
     return retVal;
   }
 
-  Map<String, Map<ExecutionType, TreeMap<ProfileCapability,
+  Map<String, Map<ExecutionType, TreeMap<Resource,
       ResourceRequestInfo>>> getLocationMap(Priority priority) {
     return remoteRequestsTable.get(priority);
   }
 
-  Map<ExecutionType, TreeMap<ProfileCapability, ResourceRequestInfo>>
+  Map<ExecutionType, TreeMap<Resource, ResourceRequestInfo>>
       getExecutionTypeMap(Priority priority, String location) {
-    Map<String, Map<ExecutionType, TreeMap<ProfileCapability,
+    Map<String, Map<ExecutionType, TreeMap<Resource,
         ResourceRequestInfo>>> locationMap = getLocationMap(priority);
     if (locationMap == null) {
       return null;
@@ -225,10 +217,10 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
     return locationMap.get(location);
   }
 
-  TreeMap<ProfileCapability, ResourceRequestInfo> getCapabilityMap(Priority
+  TreeMap<Resource, ResourceRequestInfo> getCapabilityMap(Priority
       priority, String location,
       ExecutionType execType) {
-    Map<ExecutionType, TreeMap<ProfileCapability, ResourceRequestInfo>>
+    Map<ExecutionType, TreeMap<Resource, ResourceRequestInfo>>
         executionTypeMap = getExecutionTypeMap(priority, location);
     if (executionTypeMap == null) {
       return null;
@@ -242,7 +234,7 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
     List retList = new LinkedList<>();
     for (String location : locations) {
       for (ExecutionType eType : ExecutionType.values()) {
-        TreeMap<ProfileCapability, ResourceRequestInfo> capabilityMap =
+        TreeMap<Resource, ResourceRequestInfo> capabilityMap =
             getCapabilityMap(priority, location, eType);
         if (capabilityMap != null) {
           retList.addAll(capabilityMap.values());
@@ -254,9 +246,9 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
 
   List<ResourceRequestInfo> getMatchingRequests(
       Priority priority, String resourceName, ExecutionType executionType,
-      ProfileCapability capability) {
+      Resource capability) {
     List<ResourceRequestInfo> list = new LinkedList<>();
-    TreeMap<ProfileCapability, ResourceRequestInfo> capabilityMap =
+    TreeMap<Resource, ResourceRequestInfo> capabilityMap =
         getCapabilityMap(priority, resourceName, executionType);
     if (capabilityMap != null) {
       ResourceRequestInfo resourceRequestInfo = capabilityMap.get(capability);
@@ -272,15 +264,14 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
   @SuppressWarnings("unchecked")
   ResourceRequestInfo addResourceRequest(Long allocationRequestId,
       Priority priority, String resourceName, ExecutionTypeRequest execTypeReq,
-      ProfileCapability capability, T req, boolean relaxLocality,
+      Resource capability, T req, boolean relaxLocality,
       String labelExpression) {
     ResourceRequestInfo resourceRequestInfo =
         get(priority, resourceName, execTypeReq.getExecutionType(), capability);
     if (resourceRequestInfo == null) {
       resourceRequestInfo =
           new ResourceRequestInfo(allocationRequestId, priority, resourceName,
-              capability.getProfileCapabilityOverride(), relaxLocality,
-              capability.getProfileName());
+              capability, relaxLocality);
       put(priority, resourceName, execTypeReq.getExecutionType(), capability,
           resourceRequestInfo);
     }
@@ -302,7 +293,7 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
   }
 
   ResourceRequestInfo decResourceRequest(Priority priority, String resourceName,
-      ExecutionTypeRequest execTypeReq, ProfileCapability capability, T req) {
+      ExecutionTypeRequest execTypeReq, Resource capability, T req) {
     ResourceRequestInfo resourceRequestInfo = get(priority, resourceName,
         execTypeReq.getExecutionType(), capability);
 
@@ -339,35 +330,4 @@ class RemoteRequestsTable<T> implements Iterable<ResourceRequestInfo>{
   boolean isEmpty() {
     return remoteRequestsTable.isEmpty();
   }
-
-  @SuppressWarnings("unchecked")
-  public void setResourceComparator(ProfileCapabilityComparator comparator) {
-    ProfileCapabilityComparator old = this.resourceComparator;
-    this.resourceComparator = comparator;
-    if (old != null) {
-      // we've already set a resource comparator - re-create the maps with the
-      // new one. this is needed in case someone adds container requests before
-      // registering with the RM. In such a case, the comparator won't have
-      // the resource profiles map. After registration, the map is available
-      // so re-create the capabilities maps
-
-      for (Map.Entry<Priority, Map<String, Map<ExecutionType,
-          TreeMap<ProfileCapability, ResourceRequestInfo>>>>
-          priEntry : remoteRequestsTable.entrySet()) {
-        for (Map.Entry<String, Map<ExecutionType, TreeMap<ProfileCapability,
-            ResourceRequestInfo>>> nameEntry : priEntry.getValue().entrySet()) {
-          for (Map.Entry<ExecutionType, TreeMap<ProfileCapability,
-              ResourceRequestInfo>> execEntry : nameEntry
-              .getValue().entrySet()) {
-            Map<ProfileCapability, ResourceRequestInfo> capabilityMap =
-                execEntry.getValue();
-            TreeMap<ProfileCapability, ResourceRequestInfo> newCapabiltyMap =
-                new TreeMap<>(resourceComparator);
-            newCapabiltyMap.putAll(capabilityMap);
-            execEntry.setValue(newCapabiltyMap);
-          }
-        }
-      }
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java
index b059118..8dda8b4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java
@@ -470,19 +470,16 @@ public class TestAMRMClient extends BaseAMRMClientTest{
       amClient.addContainerRequest(storedContainer1);
       amClient.addContainerRequest(storedContainer2);
       amClient.addContainerRequest(storedContainer3);
-
-      ProfileCapability profileCapability =
-          ProfileCapability.newInstance(capability);
       
       // test addition and storage
       RemoteRequestsTable<ContainerRequest> remoteRequestsTable =
           amClient.getTable(0);
       int containersRequestedAny = remoteRequestsTable.get(priority,
-          ResourceRequest.ANY, ExecutionType.GUARANTEED, profileCapability)
+          ResourceRequest.ANY, ExecutionType.GUARANTEED, capability)
           .remoteRequest.getNumContainers();
       assertEquals(2, containersRequestedAny);
       containersRequestedAny = remoteRequestsTable.get(priority1,
-          ResourceRequest.ANY, ExecutionType.GUARANTEED, profileCapability)
+          ResourceRequest.ANY, ExecutionType.GUARANTEED, capability)
           .remoteRequest.getNumContainers();
          assertEquals(1, containersRequestedAny);
       List<? extends Collection<ContainerRequest>> matches = 
@@ -1185,11 +1182,9 @@ public class TestAMRMClient extends BaseAMRMClientTest{
             true, null, ExecutionTypeRequest
             .newInstance(ExecutionType.OPPORTUNISTIC, true)));
 
-    ProfileCapability profileCapability =
-          ProfileCapability.newInstance(capability);
     int oppContainersRequestedAny =
         amClient.getTable(0).get(priority2, ResourceRequest.ANY,
-            ExecutionType.OPPORTUNISTIC, profileCapability).remoteRequest
+            ExecutionType.OPPORTUNISTIC, capability).remoteRequest
             .getNumContainers();
 
     assertEquals(1, oppContainersRequestedAny);
@@ -1326,11 +1321,9 @@ public class TestAMRMClient extends BaseAMRMClientTest{
             true, null, ExecutionTypeRequest
             .newInstance(ExecutionType.GUARANTEED, true)));
 
-    ProfileCapability profileCapability =
-        ProfileCapability.newInstance(capability);
     int oppContainersRequestedAny =
         amClient.getTable(0).get(priority2, ResourceRequest.ANY,
-            ExecutionType.GUARANTEED, profileCapability).remoteRequest
+            ExecutionType.GUARANTEED, capability).remoteRequest
             .getNumContainers();
 
     assertEquals(1, oppContainersRequestedAny);
@@ -1710,16 +1703,14 @@ public class TestAMRMClient extends BaseAMRMClientTest{
       int expAsks, int expRelease) {
     RemoteRequestsTable<ContainerRequest> remoteRequestsTable =
         amClient.getTable(allocationReqId);
-    ProfileCapability profileCapability =
-        ProfileCapability.newInstance(capability);
     int containersRequestedNode = remoteRequestsTable.get(priority,
-        node, ExecutionType.GUARANTEED, profileCapability).remoteRequest
+        node, ExecutionType.GUARANTEED, capability).remoteRequest
         .getNumContainers();
     int containersRequestedRack = remoteRequestsTable.get(priority,
-        rack, ExecutionType.GUARANTEED, profileCapability).remoteRequest
+        rack, ExecutionType.GUARANTEED, capability).remoteRequest
         .getNumContainers();
     int containersRequestedAny = remoteRequestsTable.get(priority,
-        ResourceRequest.ANY, ExecutionType.GUARANTEED, profileCapability)
+        ResourceRequest.ANY, ExecutionType.GUARANTEED, capability)
         .remoteRequest.getNumContainers();
 
     assertEquals(expNode, containersRequestedNode);
@@ -1931,31 +1922,20 @@ public class TestAMRMClient extends BaseAMRMClientTest{
       amClient.start();
       amClient.registerApplicationMaster("Host", 10000, "");
 
-      ProfileCapability capability1 = ProfileCapability.newInstance("minimum");
-      ProfileCapability capability2 = ProfileCapability.newInstance("default");
-      ProfileCapability capability3 = ProfileCapability.newInstance("maximum");
-      ProfileCapability capability4 = ProfileCapability
-          .newInstance("minimum", Resource.newInstance(2048, 1));
-      ProfileCapability capability5 = ProfileCapability.newInstance("default");
-      ProfileCapability capability6 = ProfileCapability
-          .newInstance("default", Resource.newInstance(2048, 1));
-      // http has the same capabilities as default
-      ProfileCapability capability7 = ProfileCapability.newInstance("http");
-
-      ContainerRequest storedContainer1 =
-          new ContainerRequest(capability1, nodes, racks, priority);
-      ContainerRequest storedContainer2 =
-          new ContainerRequest(capability2, nodes, racks, priority);
-      ContainerRequest storedContainer3 =
-          new ContainerRequest(capability3, nodes, racks, priority);
-      ContainerRequest storedContainer4 =
-          new ContainerRequest(capability4, nodes, racks, priority);
-      ContainerRequest storedContainer5 =
-          new ContainerRequest(capability5, nodes, racks, priority2);
-      ContainerRequest storedContainer6 =
-          new ContainerRequest(capability6, nodes, racks, priority);
-      ContainerRequest storedContainer7 =
-          new ContainerRequest(capability7, nodes, racks, priority);
+      ContainerRequest storedContainer1 = new ContainerRequest(
+          Resource.newInstance(0, 0), nodes, racks, priority, "minimum");
+      ContainerRequest storedContainer2 = new ContainerRequest(
+          Resource.newInstance(0, 0), nodes, racks, priority, "default");
+      ContainerRequest storedContainer3 = new ContainerRequest(
+          Resource.newInstance(0, 0), nodes, racks, priority, "maximum");
+      ContainerRequest storedContainer4 = new ContainerRequest(
+          Resource.newInstance(2048, 1), nodes, racks, priority, "minimum");
+      ContainerRequest storedContainer5 = new ContainerRequest(
+          Resource.newInstance(2048, 1), nodes, racks, priority2, "default");
+      ContainerRequest storedContainer6 = new ContainerRequest(
+          Resource.newInstance(2048, 1), nodes, racks, priority, "default");
+      ContainerRequest storedContainer7 = new ContainerRequest(
+          Resource.newInstance(0, 0), nodes, racks, priority, "http");
 
 
       amClient.addContainerRequest(storedContainer1);
@@ -1970,11 +1950,8 @@ public class TestAMRMClient extends BaseAMRMClientTest{
       List<? extends Collection<ContainerRequest>> matches;
       ContainerRequest storedRequest;
       // exact match
-      ProfileCapability testCapability1 =
-          ProfileCapability.newInstance("minimum");
-      matches = amClient
-          .getMatchingRequests(priority, node, ExecutionType.GUARANTEED,
-              testCapability1);
+      matches = amClient.getMatchingRequests(priority, node,
+          ExecutionType.GUARANTEED, Resource.newInstance(0, 0), "minimum");
       verifyMatches(matches, 1);
       storedRequest = matches.get(0).iterator().next();
       assertEquals(storedContainer1, storedRequest);
@@ -1983,11 +1960,9 @@ public class TestAMRMClient extends BaseAMRMClientTest{
       // exact matching with order maintained
       // we should get back 3 matches - default + http because they have the
       // same capability
-      ProfileCapability testCapability2 =
-          ProfileCapability.newInstance("default");
       matches = amClient
           .getMatchingRequests(priority, node, ExecutionType.GUARANTEED,
-              testCapability2);
+              Resource.newInstance(0, 0), "default");
       verifyMatches(matches, 2);
       // must be returned in the order they were made
       int i = 0;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientContainerRequest.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientContainerRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientContainerRequest.java
index c87123a..595c3f7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientContainerRequest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientContainerRequest.java
@@ -29,7 +29,6 @@ import org.apache.hadoop.net.DNSToSwitchMapping;
 import org.apache.hadoop.yarn.api.records.ExecutionType;
 import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
 import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.client.api.AMRMClient;
@@ -277,10 +276,8 @@ public class TestAMRMClientContainerRequest {
       AMRMClientImpl<ContainerRequest> client, ContainerRequest request,
       String location, boolean expectedRelaxLocality,
       ExecutionType executionType) {
-    ProfileCapability profileCapability = ProfileCapability
-        .newInstance(request.getResourceProfile(), request.getCapability());
     ResourceRequest ask = client.getTable(0).get(request.getPriority(),
-        location, executionType, profileCapability).remoteRequest;
+        location, executionType, request.getCapability()).remoteRequest;
     assertEquals(location, ask.getResourceName());
     assertEquals(1, ask.getNumContainers());
     assertEquals(expectedRelaxLocality, ask.getRelaxLocality());

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java
index 23e5572..eb2ecb9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java
@@ -18,22 +18,6 @@
 
 package org.apache.hadoop.yarn.client.api.impl;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.hadoop.security.Credentials;
@@ -58,7 +42,6 @@ import org.apache.hadoop.yarn.api.records.NMToken;
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.NodeState;
 import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
@@ -84,6 +67,22 @@ import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
 public class TestNMClient {
   Configuration conf = null;
   MiniYARNCluster yarnCluster = null;
@@ -332,11 +331,9 @@ public class TestNMClient {
           racks, priority));
     }
 
-    ProfileCapability profileCapability =
-        ProfileCapability.newInstance(capability);
     int containersRequestedAny = rmClient.getTable(0)
         .get(priority, ResourceRequest.ANY, ExecutionType.GUARANTEED,
-            profileCapability).remoteRequest.getNumContainers();
+            capability).remoteRequest.getNumContainers();
 
     // RM should allocate container within 2 calls to allocate()
     int allocatedContainerCount = 0;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocationE2E.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocationE2E.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocationE2E.java
index 94cb28e..753c00b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocationE2E.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocationE2E.java
@@ -42,7 +42,6 @@ import org.apache.hadoop.yarn.api.records.NMToken;
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.NodeState;
 import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.api.records.Token;
@@ -100,7 +99,6 @@ public class TestOpportunisticContainerAllocationE2E {
   private static final long AM_EXPIRE_MS = 4000;
 
   private static Resource capability;
-  private static ProfileCapability profileCapability;
   private static Priority priority;
   private static Priority priority2;
   private static Priority priority3;
@@ -153,7 +151,6 @@ public class TestOpportunisticContainerAllocationE2E {
     priority3 = Priority.newInstance(3);
     priority4 = Priority.newInstance(4);
     capability = Resource.newInstance(512, 1);
-    profileCapability = ProfileCapability.newInstance(capability);
 
     node = nodeReports.get(0).getNodeId().getHost();
     rack = nodeReports.get(0).getRackName();
@@ -276,7 +273,7 @@ public class TestOpportunisticContainerAllocationE2E {
 
     int oppContainersRequestedAny =
         amClient.getTable(0).get(priority2, ResourceRequest.ANY,
-            ExecutionType.OPPORTUNISTIC, profileCapability).remoteRequest
+            ExecutionType.OPPORTUNISTIC, capability).remoteRequest
             .getNumContainers();
 
     assertEquals(1, oppContainersRequestedAny);
@@ -397,7 +394,7 @@ public class TestOpportunisticContainerAllocationE2E {
         new AMRMClient.ContainerRequest(capability, null, null, priority3));
 
     int guarContainersRequestedAny = amClient.getTable(0).get(priority3,
-        ResourceRequest.ANY, ExecutionType.GUARANTEED, profileCapability)
+        ResourceRequest.ANY, ExecutionType.GUARANTEED, capability)
         .remoteRequest.getNumContainers();
 
     assertEquals(1, guarContainersRequestedAny);
@@ -536,17 +533,17 @@ public class TestOpportunisticContainerAllocationE2E {
                 ExecutionType.OPPORTUNISTIC, true)));
 
     int containersRequestedNode = amClient.getTable(0).get(priority,
-        node, ExecutionType.GUARANTEED, profileCapability).remoteRequest
+        node, ExecutionType.GUARANTEED, capability).remoteRequest
         .getNumContainers();
     int containersRequestedRack = amClient.getTable(0).get(priority,
-        rack, ExecutionType.GUARANTEED, profileCapability).remoteRequest
+        rack, ExecutionType.GUARANTEED, capability).remoteRequest
         .getNumContainers();
     int containersRequestedAny = amClient.getTable(0).get(priority,
-        ResourceRequest.ANY, ExecutionType.GUARANTEED, profileCapability)
+        ResourceRequest.ANY, ExecutionType.GUARANTEED, capability)
         .remoteRequest.getNumContainers();
     int oppContainersRequestedAny =
         amClient.getTable(0).get(priority2, ResourceRequest.ANY,
-            ExecutionType.OPPORTUNISTIC, profileCapability).remoteRequest
+            ExecutionType.OPPORTUNISTIC, capability).remoteRequest
             .getNumContainers();
 
     assertEquals(4, containersRequestedNode);
@@ -568,17 +565,17 @@ public class TestOpportunisticContainerAllocationE2E {
                 ExecutionType.OPPORTUNISTIC, true)));
 
     containersRequestedNode = amClient.getTable(0).get(priority,
-        node, ExecutionType.GUARANTEED, profileCapability).remoteRequest
+        node, ExecutionType.GUARANTEED, capability).remoteRequest
         .getNumContainers();
     containersRequestedRack = amClient.getTable(0).get(priority,
-        rack, ExecutionType.GUARANTEED, profileCapability).remoteRequest
+        rack, ExecutionType.GUARANTEED, capability).remoteRequest
         .getNumContainers();
     containersRequestedAny = amClient.getTable(0).get(priority,
-        ResourceRequest.ANY, ExecutionType.GUARANTEED, profileCapability)
+        ResourceRequest.ANY, ExecutionType.GUARANTEED, capability)
         .remoteRequest.getNumContainers();
     oppContainersRequestedAny =
         amClient.getTable(0).get(priority2, ResourceRequest.ANY,
-            ExecutionType.OPPORTUNISTIC, profileCapability).remoteRequest
+            ExecutionType.OPPORTUNISTIC, capability).remoteRequest
             .getNumContainers();
 
     assertEquals(2, containersRequestedNode);
@@ -697,7 +694,7 @@ public class TestOpportunisticContainerAllocationE2E {
 
     int oppContainersRequestedAny = amClient.getTable(0)
         .get(priority3, ResourceRequest.ANY, ExecutionType.OPPORTUNISTIC,
-            profileCapability).remoteRequest.getNumContainers();
+            capability).remoteRequest.getNumContainers();
 
     assertEquals(2, oppContainersRequestedAny);
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProfileCapabilityPBImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProfileCapabilityPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProfileCapabilityPBImpl.java
deleted file mode 100644
index cfe1e8c..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProfileCapabilityPBImpl.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.api.records.impl.pb;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.proto.YarnProtos;
-import org.apache.hadoop.yarn.proto.YarnProtos.ProfileCapabilityProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.ProfileCapabilityProtoOrBuilder;
-import org.apache.hadoop.yarn.util.resource.Resources;
-
-/**
- * Protobuf implementation for the ProfileCapability class.
- */
-@InterfaceAudience.Private
-@InterfaceStability.Unstable
-public class ProfileCapabilityPBImpl extends ProfileCapability {
-
-  private ProfileCapabilityProto proto =
-      ProfileCapabilityProto.getDefaultInstance();
-  private ProfileCapabilityProto.Builder builder;
-
-  private boolean viaProto;
-
-  private String profile;
-  private Resource profileCapabilityOverride;
-
-  public ProfileCapabilityPBImpl() {
-    builder = ProfileCapabilityProto.newBuilder();
-  }
-
-  public ProfileCapabilityPBImpl(ProfileCapabilityProto proto) {
-    this.proto = proto;
-    viaProto = true;
-  }
-
-  @Override
-  public String getProfileName() {
-    if (profile != null) {
-      return profile;
-    }
-    ProfileCapabilityProtoOrBuilder p = viaProto ? proto : builder;
-    if (p.hasProfile()) {
-      profile = p.getProfile();
-    }
-    return profile;
-  }
-
-  @Override
-  public Resource getProfileCapabilityOverride() {
-    if (profileCapabilityOverride != null) {
-      return profileCapabilityOverride;
-    }
-    ProfileCapabilityProtoOrBuilder p = viaProto ? proto : builder;
-    if (p.hasProfileCapabilityOverride()) {
-      profileCapabilityOverride =
-          Resources.clone(new ResourcePBImpl(p.getProfileCapabilityOverride()));
-    }
-    return profileCapabilityOverride;
-  }
-
-  @Override
-  public void setProfileName(String profileName) {
-    this.profile = profileName;
-  }
-
-  @Override
-  public void setProfileCapabilityOverride(Resource r) {
-    this.profileCapabilityOverride = r;
-  }
-
-  public ProfileCapabilityProto getProto() {
-    mergeLocalToProto();
-    proto = viaProto ? proto : builder.build();
-    viaProto = true;
-    return proto;
-  }
-
-  private void mergeLocalToProto() {
-    if (viaProto) {
-      maybeInitBuilder();
-    }
-    mergeLocalToBuilder();
-    proto = builder.build();
-    viaProto = true;
-  }
-
-  private void mergeLocalToBuilder() {
-    if (profile != null) {
-      builder.setProfile(profile);
-    }
-    if (profileCapabilityOverride != null) {
-      builder.setProfileCapabilityOverride(
-          convertToProtoFormat(profileCapabilityOverride));
-    }
-  }
-
-  private void maybeInitBuilder() {
-    if (viaProto || builder == null) {
-      builder = ProfileCapabilityProto.newBuilder(proto);
-    }
-    viaProto = false;
-  }
-
-  private YarnProtos.ResourceProto convertToProtoFormat(Resource res) {
-    return ProtoUtils.convertToProtoFormat(res);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceRequestPBImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceRequestPBImpl.java
index 3c29645..698800d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceRequestPBImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceRequestPBImpl.java
@@ -23,10 +23,8 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
 import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
-import org.apache.hadoop.yarn.proto.YarnProtos.ProfileCapabilityProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ResourceRequestProto;
@@ -42,8 +40,7 @@ public class ResourceRequestPBImpl extends  ResourceRequest {
   private Priority priority = null;
   private Resource capability = null;
   private ExecutionTypeRequest executionTypeRequest = null;
-  private ProfileCapability profile = null;
-  
+
   
   public ResourceRequestPBImpl() {
     builder = ResourceRequestProto.newBuilder();
@@ -72,9 +69,6 @@ public class ResourceRequestPBImpl extends  ResourceRequest {
       builder.setExecutionTypeRequest(
           ProtoUtils.convertToProtoFormat(this.executionTypeRequest));
     }
-    if (this.profile != null) {
-      builder.setProfile(converToProtoFormat(this.profile));
-    }
   }
 
   private void mergeLocalToProto() {
@@ -236,7 +230,7 @@ public class ResourceRequestPBImpl extends  ResourceRequest {
         + ", Relax Locality: " + getRelaxLocality()
         + ", Execution Type Request: " + getExecutionTypeRequest()
         + ", Node Label Expression: " + getNodeLabelExpression()
-        + ", Resource Profile: " + getProfileCapability() + "}";
+        + "}";
   }
 
   @Override
@@ -257,34 +251,4 @@ public class ResourceRequestPBImpl extends  ResourceRequest {
     }
     builder.setNodeLabelExpression(nodeLabelExpression);
   }
-
-  @Override
-  public void setProfileCapability(ProfileCapability profileCapability) {
-    maybeInitBuilder();
-    if (profile == null) {
-      builder.clearProfile();
-    }
-    this.profile = profileCapability;
-  }
-
-  @Override
-  public ProfileCapability getProfileCapability() {
-    if (profile != null) {
-      return profile;
-    }
-    ResourceRequestProtoOrBuilder p = viaProto ? proto : builder;
-    if (!p.hasProfile()) {
-      return null;
-    }
-    return new ProfileCapabilityPBImpl(p.getProfile());
-  }
-
-  private ProfileCapabilityProto converToProtoFormat(
-      ProfileCapability profileCapability) {
-    ProfileCapabilityPBImpl tmp = new ProfileCapabilityPBImpl();
-    tmp.setProfileName(profileCapability.getProfileName());
-    tmp.setProfileCapabilityOverride(
-        profileCapability.getProfileCapabilityOverride());
-    return tmp.getProto();
-  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java
index ae80910..c9a3b22 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java
@@ -132,7 +132,6 @@ import org.apache.hadoop.yarn.api.records.PreemptionContract;
 import org.apache.hadoop.yarn.api.records.PreemptionMessage;
 import org.apache.hadoop.yarn.api.records.PreemptionResourceRequest;
 import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
 import org.apache.hadoop.yarn.api.records.QueueConfigurations;
 import org.apache.hadoop.yarn.api.records.QueueInfo;
 import org.apache.hadoop.yarn.api.records.QueueState;
@@ -185,7 +184,6 @@ import org.apache.hadoop.yarn.api.records.impl.pb.PreemptionContractPBImpl;
 import org.apache.hadoop.yarn.api.records.impl.pb.PreemptionMessagePBImpl;
 import org.apache.hadoop.yarn.api.records.impl.pb.PreemptionResourceRequestPBImpl;
 import org.apache.hadoop.yarn.api.records.impl.pb.PriorityPBImpl;
-import org.apache.hadoop.yarn.api.records.impl.pb.ProfileCapabilityPBImpl;
 import org.apache.hadoop.yarn.api.records.impl.pb.QueueInfoPBImpl;
 import org.apache.hadoop.yarn.api.records.impl.pb.QueueUserACLInfoPBImpl;
 import org.apache.hadoop.yarn.api.records.impl.pb.ResourceBlacklistRequestPBImpl;
@@ -322,7 +320,6 @@ import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationResponseP
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceProfilesResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetResourceProfileRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetResourceProfileResponseProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.ProfileCapabilityProto;
 import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.AddToClusterNodeLabelsRequestPBImpl;
 import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.AddToClusterNodeLabelsResponsePBImpl;
 import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.CheckForDecommissioningNodesRequestPBImpl;
@@ -384,7 +381,6 @@ public class TestPBImplRecords extends BasePBImplRecordsTest {
     generateByNewInstance(NodeReport.class);
     generateByNewInstance(Token.class);
     generateByNewInstance(NMToken.class);
-    generateByNewInstance(ProfileCapability.class);
     generateByNewInstance(ResourceRequest.class);
     generateByNewInstance(ApplicationAttemptReport.class);
     generateByNewInstance(ApplicationResourceUsageReport.class);
@@ -1216,12 +1212,6 @@ public class TestPBImplRecords extends BasePBImplRecordsTest {
   }
 
   @Test
-  public void testProfileCapabilityPBImpl() throws Exception {
-    validatePBImplRecord(ProfileCapabilityPBImpl.class,
-        ProfileCapabilityProto.class);
-  }
-
-  @Test
   public void testResourceTypesInfoPBImpl() throws Exception {
     validatePBImplRecord(ResourceTypeInfoPBImpl.class,
         YarnProtos.ResourceTypeInfoProto.class);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[20/21] hadoop git commit: HDFS-13058. Fix dfs.namenode.shared.edits.dir in TestJournalNode. Contributed by Bharat Viswanadham.

Posted by ha...@apache.org.

HDFS-13058. Fix dfs.namenode.shared.edits.dir in TestJournalNode. Contributed by Bharat Viswanadham.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/4c2119f0
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/4c2119f0
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/4c2119f0

Branch: refs/heads/HDFS-12996
Commit: 4c2119f04ee29784090bf121fd995dea78e542a3
Parents: 8d5ea74
Author: Arpit Agarwal <ar...@apache.org>
Authored: Fri Feb 16 10:29:22 2018 -0800
Committer: Arpit Agarwal <ar...@apache.org>
Committed: Fri Feb 16 10:29:22 2018 -0800

----------------------------------------------------------------------
 .../hdfs/qjournal/server/TestJournalNode.java   | 26 ++++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/4c2119f0/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java
index 581218d..8d58792 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java
@@ -83,6 +83,7 @@ public class TestJournalNode {
     File editsDir = new File(MiniDFSCluster.getBaseDirectory() +
         File.separator + "TestJournalNode");
     FileUtil.fullyDelete(editsDir);
+    journalId = "test-journalid-" + GenericTestUtils.uniqueSequenceId();
 
     if (testName.getMethodName().equals("testJournalDirPerNameSpace")) {
       setFederationConf();
@@ -113,33 +114,33 @@ public class TestJournalNode {
       conf.setBoolean(DFSConfigKeys.DFS_JOURNALNODE_ENABLE_SYNC_KEY,
           false);
       conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
-          "qjournal://jn0:9900;jn1:9901");
+          "qjournal://jn0:9900;jn1:9901/" + journalId);
     } else if (testName.getMethodName().equals(
         "testJournalNodeSyncerNotStartWhenSyncEnabledIncorrectURI")) {
       conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
-          "qjournal://journal0\\:9900;journal1:9901");
+          "qjournal://journal0\\:9900;journal1:9901/" + journalId);
     } else if (testName.getMethodName().equals(
         "testJournalNodeSyncerNotStartWhenSyncEnabled")) {
       conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
-          "qjournal://jn0:9900;jn1:9901");
+          "qjournal://jn0:9900;jn1:9901/" + journalId);
     } else if (testName.getMethodName().equals(
         "testJournalNodeSyncwithFederationTypeConfigWithNameServiceId")) {
       conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY +".ns1",
-          "qjournal://journalnode0:9900;journalnode0:9901");
+          "qjournal://journalnode0:9900;journalnode0:9901/" + journalId);
     } else if (testName.getMethodName().equals(
         "testJournalNodeSyncwithFederationTypeConfigWithNamenodeId")) {
       conf.set(DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX + ".ns1", "nn1,nn2");
       conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY +".ns1" +".nn1",
-          "qjournal://journalnode0:9900;journalnode1:9901");
+          "qjournal://journalnode0:9900;journalnode1:9901/" +journalId);
       conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY +".ns1" +".nn2",
-          "qjournal://journalnode0:9900;journalnode1:9901");
+          "qjournal://journalnode0:9900;journalnode1:9901/" +journalId);
     } else if (testName.getMethodName().equals(
         "testJournalNodeSyncwithFederationTypeIncorrectConfigWithNamenodeId")) {
       conf.set(DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX + ".ns1", "nn1,nn2");
       conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY +".ns1" +".nn1",
-          "qjournal://journalnode0:9900;journalnode1:9901");
+          "qjournal://journalnode0:9900;journalnode1:9901/" + journalId);
       conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY +".ns1" +".nn2",
-          "qjournal://journalnode0:9902;journalnode1:9903");
+          "qjournal://journalnode0:9902;journalnode1:9903/" + journalId);
     }
     jn = new JournalNode();
     jn.setConf(conf);
@@ -161,7 +162,6 @@ public class TestJournalNode {
         journal.format(fakeNameSpaceInfo);
       }
     } else {
-      journalId = "test-journalid-" + GenericTestUtils.uniqueSequenceId();
       journal = jn.getOrCreateJournal(journalId);
       journal.format(FAKE_NSINFO);
     }
@@ -176,16 +176,16 @@ public class TestJournalNode {
     //ns1
     conf.set(DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX + ".ns1", "nn1,nn2");
     conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY +".ns1" +".nn1",
-        "qjournal://journalnode0:9900;journalnode1:9901/ns1");
+        "qjournal://journalnode0:9900;journalnode1:9901/test-journalid-ns1");
     conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY +".ns1" +".nn2",
-        "qjournal://journalnode0:9900;journalnode1:9901/ns2");
+        "qjournal://journalnode0:9900;journalnode1:9901/test-journalid-ns1");
 
     //ns2
     conf.set(DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX + ".ns2", "nn3,nn4");
     conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY +".ns2" +".nn3",
-        "qjournal://journalnode0:9900;journalnode1:9901/ns2");
+        "qjournal://journalnode0:9900;journalnode1:9901/test-journalid-ns2");
     conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY +".ns2" +".nn4",
-        "qjournal://journalnode0:9900;journalnode1:9901/ns2");
+        "qjournal://journalnode0:9900;journalnode1:9901/test-journalid-ns2");
   }
   
   @After


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[05/21] hadoop git commit: HADOOP-15176. Enhance IAM Assumed Role support in S3A client. Contributed by Steve Loughran

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAssumeRole.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAssumeRole.java
deleted file mode 100644
index 7c8760b..0000000
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAssumeRole.java
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.fs.s3a;
-
-import java.io.Closeable;
-import java.io.IOException;
-import java.nio.file.AccessDeniedException;
-import java.util.concurrent.Callable;
-
-import com.amazonaws.auth.AWSCredentials;
-import com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-import static org.apache.hadoop.fs.s3a.Constants.*;
-import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
-import static org.apache.hadoop.test.LambdaTestUtils.intercept;
-
-/**
- * Tests use of assumed roles.
- * Only run if an assumed role is provided.
- */
-public class ITestAssumeRole extends AbstractS3ATestBase {
-
-  private static final Logger LOG =
-      LoggerFactory.getLogger(ITestAssumeRole.class);
-
-  private static final String ARN_EXAMPLE
-      = "arn:aws:kms:eu-west-1:00000000000:key/" +
-      "0000000-16c9-4832-a1a9-c8bbef25ec8b";
-
-  private static final String E_BAD_ROLE
-      = "Not authorized to perform sts:AssumeRole";
-
-  /**
-   * This is AWS policy removes read access.
-   */
-  public static final String RESTRICTED_POLICY = "{\n"
-      + "   \"Version\": \"2012-10-17\",\n"
-      + "   \"Statement\": [{\n"
-      + "      \"Effect\": \"Deny\",\n"
-      + "      \"Action\": \"s3:ListObjects\",\n"
-      + "      \"Resource\": \"*\"\n"
-      + "    }\n"
-      + "   ]\n"
-      + "}";
-
-  private void assumeRoleTests() {
-    assume("No ARN for role tests", !getAssumedRoleARN().isEmpty());
-  }
-
-  private String getAssumedRoleARN() {
-    return getContract().getConf().getTrimmed(ASSUMED_ROLE_ARN, "");
-  }
-
-  /**
-   * Expect a filesystem to fail to instantiate.
-   * @param conf config to use
-   * @param clazz class of exception to expect
-   * @param text text in exception
-   * @param <E> type of exception as inferred from clazz
-   * @throws Exception if the exception was the wrong class
-   */
-  private <E extends Throwable> void expectFileSystemFailure(
-      Configuration conf,
-      Class<E> clazz,
-      String text) throws Exception {
-    interceptC(clazz,
-        text,
-        () -> new Path(getFileSystem().getUri()).getFileSystem(conf));
-  }
-
-  /**
-   * Experimental variant of intercept() which closes any Closeable
-   * returned.
-   */
-  private static <E extends Throwable> E interceptC(
-      Class<E> clazz, String text,
-      Callable<Closeable> eval)
-      throws Exception {
-
-    return intercept(clazz, text,
-        () -> {
-          try (Closeable c = eval.call()) {
-            return c.toString();
-          }
-        });
-  }
-
-  @Test
-  public void testCreateCredentialProvider() throws IOException {
-    assumeRoleTests();
-    describe("Create the credential provider");
-
-    String roleARN = getAssumedRoleARN();
-
-    Configuration conf = new Configuration(getContract().getConf());
-    conf.set(AWS_CREDENTIALS_PROVIDER, AssumedRoleCredentialProvider.NAME);
-    conf.set(ASSUMED_ROLE_ARN, roleARN);
-    conf.set(ASSUMED_ROLE_SESSION_NAME, "valid");
-    conf.set(ASSUMED_ROLE_SESSION_DURATION, "45m");
-    conf.set(ASSUMED_ROLE_POLICY, RESTRICTED_POLICY);
-    try (AssumedRoleCredentialProvider provider
-             = new AssumedRoleCredentialProvider(conf)) {
-      LOG.info("Provider is {}", provider);
-      AWSCredentials credentials = provider.getCredentials();
-      assertNotNull("Null credentials from " + provider, credentials);
-    }
-  }
-
-  @Test
-  public void testAssumeRoleCreateFS() throws IOException {
-    assumeRoleTests();
-    describe("Create an FS client with the role and do some basic IO");
-
-    String roleARN = getAssumedRoleARN();
-    Configuration conf = createAssumedRoleConfig(roleARN);
-    conf.set(ASSUMED_ROLE_SESSION_NAME, "valid");
-    conf.set(ASSUMED_ROLE_SESSION_DURATION, "45m");
-    Path path = new Path(getFileSystem().getUri());
-    LOG.info("Creating test FS and user {} with assumed role {}",
-        conf.get(ACCESS_KEY), roleARN);
-
-    try (FileSystem fs = path.getFileSystem(conf)) {
-      fs.getFileStatus(new Path("/"));
-      fs.mkdirs(path("testAssumeRoleFS"));
-    }
-  }
-
-  @Test
-  public void testAssumeRoleRestrictedPolicyFS() throws Exception {
-    assumeRoleTests();
-    describe("Restrict the policy for this session; verify that reads fail");
-
-    String roleARN = getAssumedRoleARN();
-    Configuration conf = createAssumedRoleConfig(roleARN);
-    conf.set(ASSUMED_ROLE_POLICY, RESTRICTED_POLICY);
-    Path path = new Path(getFileSystem().getUri());
-    try (FileSystem fs = path.getFileSystem(conf)) {
-      intercept(AccessDeniedException.class, "getFileStatus",
-          () -> fs.getFileStatus(new Path("/")));
-      intercept(AccessDeniedException.class, "getFileStatus",
-          () -> fs.listStatus(new Path("/")));
-      intercept(AccessDeniedException.class, "getFileStatus",
-          () -> fs.mkdirs(path("testAssumeRoleFS")));
-    }
-  }
-
-  @Test
-  public void testAssumeRoleFSBadARN() throws Exception {
-    assumeRoleTests();
-    describe("Attemnpt to create the FS with an invalid ARN");
-    Configuration conf = createAssumedRoleConfig(getAssumedRoleARN());
-    conf.set(ASSUMED_ROLE_ARN, ARN_EXAMPLE);
-    expectFileSystemFailure(conf, AccessDeniedException.class, E_BAD_ROLE);
-  }
-
-  @Test
-  public void testAssumeRoleNoARN() throws Exception {
-    assumeRoleTests();
-    describe("Attemnpt to create the FS with no ARN");
-    Configuration conf = createAssumedRoleConfig(getAssumedRoleARN());
-    conf.unset(ASSUMED_ROLE_ARN);
-    expectFileSystemFailure(conf,
-        IOException.class,
-        AssumedRoleCredentialProvider.E_NO_ROLE);
-  }
-
-  @Test
-  public void testAssumeRoleFSBadPolicy() throws Exception {
-    assumeRoleTests();
-    describe("Attemnpt to create the FS with malformed JSON");
-    Configuration conf = createAssumedRoleConfig(getAssumedRoleARN());
-    // add some malformed JSON
-    conf.set(ASSUMED_ROLE_POLICY, "}");
-    expectFileSystemFailure(conf,
-        AWSBadRequestException.class,
-        "JSON");
-  }
-
-  @Test
-  public void testAssumeRoleFSBadPolicy2() throws Exception {
-    assumeRoleTests();
-    describe("Attemnpt to create the FS with valid but non-compliant JSON");
-    Configuration conf = createAssumedRoleConfig(getAssumedRoleARN());
-    // add some invalid JSON
-    conf.set(ASSUMED_ROLE_POLICY, "{'json':'but not what AWS wants}");
-    expectFileSystemFailure(conf,
-        AWSBadRequestException.class,
-        "Syntax errors in policy");
-  }
-
-  @Test
-  public void testAssumeRoleCannotAuthAssumedRole() throws Exception {
-    assumeRoleTests();
-    describe("Assert that you can't use assumed roles to auth assumed roles");
-
-    Configuration conf = createAssumedRoleConfig(getAssumedRoleARN());
-    conf.set(ASSUMED_ROLE_CREDENTIALS_PROVIDER,
-        AssumedRoleCredentialProvider.NAME);
-    expectFileSystemFailure(conf,
-        IOException.class,
-        AssumedRoleCredentialProvider.E_FORBIDDEN_PROVIDER);
-  }
-
-  @Test
-  public void testAssumeRoleBadInnerAuth() throws Exception {
-    assumeRoleTests();
-    describe("Try to authenticate with a keypair with spaces");
-
-    Configuration conf = createAssumedRoleConfig(getAssumedRoleARN());
-    conf.set(ASSUMED_ROLE_CREDENTIALS_PROVIDER,
-        SimpleAWSCredentialsProvider.NAME);
-    conf.set(ACCESS_KEY, "not valid");
-    conf.set(SECRET_KEY, "not secret");
-    expectFileSystemFailure(conf, AWSBadRequestException.class, "not a valid " +
-        "key=value pair (missing equal-sign) in Authorization header");
-  }
-
-  @Test
-  public void testAssumeRoleBadInnerAuth2() throws Exception {
-    assumeRoleTests();
-    describe("Try to authenticate with an invalid keypair");
-
-    Configuration conf = createAssumedRoleConfig(getAssumedRoleARN());
-    conf.set(ASSUMED_ROLE_CREDENTIALS_PROVIDER,
-        SimpleAWSCredentialsProvider.NAME);
-    conf.set(ACCESS_KEY, "notvalid");
-    conf.set(SECRET_KEY, "notsecret");
-    expectFileSystemFailure(conf, AccessDeniedException.class,
-        "The security token included in the request is invalid");
-  }
-
-  @Test
-  public void testAssumeRoleBadSession() throws Exception {
-    assumeRoleTests();
-    describe("Try to authenticate with an invalid session");
-
-    Configuration conf = createAssumedRoleConfig(getAssumedRoleARN());
-    conf.set(ASSUMED_ROLE_SESSION_NAME,
-        "Session Names cannot Hava Spaces!");
-    expectFileSystemFailure(conf, AWSBadRequestException.class,
-        "Member must satisfy regular expression pattern");
-  }
-
-  /**
-   * Create a config for an assumed role; it also disables FS caching.
-   * @param roleARN ARN of role
-   * @return the configuration
-   */
-  private Configuration createAssumedRoleConfig(String roleARN) {
-    Configuration conf = new Configuration(getContract().getConf());
-    conf.set(AWS_CREDENTIALS_PROVIDER, AssumedRoleCredentialProvider.NAME);
-    conf.set(ASSUMED_ROLE_ARN, roleARN);
-    disableFilesystemCaching(conf);
-    return conf;
-  }
-
-  @Test
-  public void testAssumedRoleCredentialProviderValidation() throws Throwable {
-    Configuration conf = new Configuration();
-    conf.set(ASSUMED_ROLE_ARN, "");
-    interceptC(IOException.class,
-        AssumedRoleCredentialProvider.E_NO_ROLE,
-        () -> new AssumedRoleCredentialProvider(conf));
-  }
-
-  @Test
-  public void testAssumedDuration() throws Throwable {
-    assumeRoleTests();
-    describe("Expect the constructor to fail if the session is to short");
-    Configuration conf = new Configuration();
-    conf.set(ASSUMED_ROLE_SESSION_DURATION, "30s");
-    interceptC(IllegalArgumentException.class, "",
-        () -> new AssumedRoleCredentialProvider(conf));
-  }
-
-  @Test
-  public void testAssumedInvalidRole() throws Throwable {
-    assumeRoleTests();
-    describe("Expect the constructor to fail if the role is invalid");
-    Configuration conf = new Configuration();
-    conf.set(ASSUMED_ROLE_ARN, ARN_EXAMPLE);
-    interceptC(AWSSecurityTokenServiceException.class,
-        E_BAD_ROLE,
-        () -> new AssumedRoleCredentialProvider(conf));
-  }
-
-  /**
-   * This is here to check up on the S3ATestUtils probes themselves.
-   * @see S3ATestUtils#authenticationContains(Configuration, String).
-   */
-  @Test
-  public void testauthenticationContainsProbes() {
-    Configuration conf = new Configuration(false);
-    assertFalse("found AssumedRoleCredentialProvider",
-        authenticationContains(conf, AssumedRoleCredentialProvider.NAME));
-
-    conf.set(AWS_CREDENTIALS_PROVIDER, AssumedRoleCredentialProvider.NAME);
-    assertTrue("didn't find AssumedRoleCredentialProvider",
-        authenticationContains(conf, AssumedRoleCredentialProvider.NAME));
-  }
-}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
index d6533bf..da0060e 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
@@ -19,6 +19,8 @@
 package org.apache.hadoop.fs.s3a;
 
 import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -39,23 +41,28 @@ import org.junit.internal.AssumptionViolatedException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.Closeable;
 import java.io.IOException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.List;
+import java.util.concurrent.Callable;
 
 import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
 import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.*;
 import static org.apache.hadoop.fs.s3a.S3ATestConstants.*;
 import static org.apache.hadoop.fs.s3a.Constants.*;
 import static org.apache.hadoop.fs.s3a.S3AUtils.propagateBucketOptions;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
 import static org.junit.Assert.*;
 
 /**
  * Utilities for the S3A tests.
  */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
 public final class S3ATestUtils {
   private static final Logger LOG = LoggerFactory.getLogger(
       S3ATestUtils.class);
@@ -456,6 +463,33 @@ public final class S3ATestUtils {
   }
 
   /**
+   * Variant of {@code LambdaTestUtils#intercept() which closes the Closeable
+   * returned by the invoked operation, and using its toString() value
+   * for exception messages.
+   * @param clazz class of exception; the raised exception must be this class
+   * <i>or a subclass</i>.
+   * @param contained string which must be in the {@code toString()} value
+   * of the exception
+   * @param eval expression to eval
+   * @param <T> return type of expression
+   * @param <E> exception class
+   * @return the caught exception if it was of the expected type and contents
+   */
+  public static <E extends Throwable, T extends Closeable> E interceptClosing(
+      Class<E> clazz,
+      String contained,
+      Callable<T> eval)
+      throws Exception {
+
+    return intercept(clazz, contained,
+        () -> {
+          try (Closeable c = eval.call()) {
+            return c.toString();
+          }
+        });
+  }
+
+  /**
    * Helper class to do diffs of metrics.
    */
   public static final class MetricDiff {
@@ -762,21 +796,23 @@ public final class S3ATestUtils {
   }
 
   /**
-   * List a directory.
+   * List a directory/directory tree.
    * @param fileSystem FS
    * @param path path
+   * @param recursive do a recursive listing?
+   * @return the number of files found.
    * @throws IOException failure.
    */
-  public static void lsR(FileSystem fileSystem, Path path, boolean recursive)
+  public static long lsR(FileSystem fileSystem, Path path, boolean recursive)
       throws Exception {
     if (path == null) {
       // surfaces when someone calls getParent() on something at the top
       // of the path
       LOG.info("Empty path");
-      return;
+      return 0;
     }
-    S3AUtils.applyLocatedFiles(fileSystem.listFiles(path, recursive),
-        (status) -> LOG.info("  {}", status));
+    return S3AUtils.applyLocatedFiles(fileSystem.listFiles(path, recursive),
+        (status) -> LOG.info("{}", status));
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java
index a5be5de..d731ae7 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java
@@ -18,12 +18,6 @@
 
 package org.apache.hadoop.fs.s3a;
 
-import static org.apache.hadoop.fs.s3a.Constants.*;
-import static org.apache.hadoop.fs.s3a.S3ATestConstants.*;
-import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
-import static org.apache.hadoop.fs.s3a.S3AUtils.*;
-import static org.junit.Assert.*;
-
 import java.io.IOException;
 import java.net.URI;
 import java.util.Arrays;
@@ -33,13 +27,19 @@ import com.amazonaws.auth.AWSCredentials;
 import com.amazonaws.auth.AWSCredentialsProvider;
 import com.amazonaws.auth.EnvironmentVariableCredentialsProvider;
 import com.amazonaws.auth.InstanceProfileCredentialsProvider;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider;
 
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
+import static org.apache.hadoop.fs.s3a.Constants.*;
+import static org.apache.hadoop.fs.s3a.S3ATestConstants.*;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
+import static org.apache.hadoop.fs.s3a.S3AUtils.*;
+import static org.junit.Assert.*;
 
 /**
  * Unit tests for {@link Constants#AWS_CREDENTIALS_PROVIDER} logic.
@@ -248,10 +248,10 @@ public class TestS3AAWSCredentialsProvider {
       AWSCredentialsProvider provider = providers.get(i);
       assertNotNull(
           String.format("At position %d, expected class is %s, but found null.",
-          i, expectedClass), provider);
+              i, expectedClass), provider);
       assertTrue(
           String.format("At position %d, expected class is %s, but found %s.",
-          i, expectedClass, provider.getClass()),
+              i, expectedClass, provider.getClass()),
           expectedClass.isAssignableFrom(provider.getClass()));
     }
   }
@@ -269,7 +269,23 @@ public class TestS3AAWSCredentialsProvider {
     assertNotNull(provider2);
     assertInstanceOf(InstanceProfileCredentialsProvider.class, provider2);
     assertSame("Expected all usage of InstanceProfileCredentialsProvider to "
-        + "share a singleton instance, but found unique instances.",
+            + "share a singleton instance, but found unique instances.",
         provider1, provider2);
   }
+
+  /**
+   * This is here to check up on the S3ATestUtils probes themselves.
+   * @see S3ATestUtils#authenticationContains(Configuration, String).
+   */
+  @Test
+  public void testAuthenticationContainsProbes() {
+    Configuration conf = new Configuration(false);
+    assertFalse("found AssumedRoleCredentialProvider",
+        authenticationContains(conf, AssumedRoleCredentialProvider.NAME));
+
+    conf.set(AWS_CREDENTIALS_PROVIDER, AssumedRoleCredentialProvider.NAME);
+    assertTrue("didn't find AssumedRoleCredentialProvider",
+        authenticationContains(conf, AssumedRoleCredentialProvider.NAME));
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java
new file mode 100644
index 0000000..08171b0
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java
@@ -0,0 +1,789 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.auth;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.file.AccessDeniedException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import com.amazonaws.auth.AWSCredentials;
+import com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.s3a.AWSBadRequestException;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+import org.apache.hadoop.fs.s3a.MultipartUtils;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.S3AUtils;
+import org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider;
+import org.apache.hadoop.fs.s3a.commit.CommitConstants;
+import org.apache.hadoop.fs.s3a.commit.CommitOperations;
+import org.apache.hadoop.fs.s3a.commit.files.PendingSet;
+import org.apache.hadoop.fs.s3a.commit.files.SinglePendingCommit;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.assertRenameOutcome;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
+import static org.apache.hadoop.fs.s3a.Constants.*;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
+import static org.apache.hadoop.fs.s3a.S3AUtils.*;
+import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.*;
+import static org.apache.hadoop.fs.s3a.auth.RoleModel.*;
+import static org.apache.hadoop.fs.s3a.auth.RolePolicies.*;
+import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.forbidden;
+import static org.apache.hadoop.test.LambdaTestUtils.*;
+
+/**
+ * Tests use of assumed roles.
+ * Only run if an assumed role is provided.
+ */
+@SuppressWarnings({"IOResourceOpenedButNotSafelyClosed", "ThrowableNotThrown"})
+public class ITestAssumeRole extends AbstractS3ATestBase {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(ITestAssumeRole.class);
+
+  private static final Path ROOT = new Path("/");
+
+  /**
+   * A role FS; if non-null it is closed in teardown.
+   */
+  private S3AFileSystem roleFS;
+
+  @Override
+  public void setup() throws Exception {
+    super.setup();
+    assumeRoleTests();
+  }
+
+  @Override
+  public void teardown() throws Exception {
+    S3AUtils.closeAll(LOG, roleFS);
+    super.teardown();
+  }
+
+  private void assumeRoleTests() {
+    assume("No ARN for role tests", !getAssumedRoleARN().isEmpty());
+  }
+
+  private String getAssumedRoleARN() {
+    return getContract().getConf().getTrimmed(ASSUMED_ROLE_ARN, "");
+  }
+
+  /**
+   * Expect a filesystem to fail to instantiate.
+   * @param conf config to use
+   * @param clazz class of exception to expect
+   * @param text text in exception
+   * @param <E> type of exception as inferred from clazz
+   * @throws Exception if the exception was the wrong class
+   */
+  private <E extends Throwable> void expectFileSystemCreateFailure(
+      Configuration conf,
+      Class<E> clazz,
+      String text) throws Exception {
+    interceptClosing(clazz,
+        text,
+        () -> new Path(getFileSystem().getUri()).getFileSystem(conf));
+  }
+
+  @Test
+  public void testCreateCredentialProvider() throws IOException {
+    describe("Create the credential provider");
+
+    String roleARN = getAssumedRoleARN();
+
+    Configuration conf = new Configuration(getContract().getConf());
+    conf.set(AWS_CREDENTIALS_PROVIDER, AssumedRoleCredentialProvider.NAME);
+    conf.set(ASSUMED_ROLE_ARN, roleARN);
+    conf.set(ASSUMED_ROLE_SESSION_NAME, "valid");
+    conf.set(ASSUMED_ROLE_SESSION_DURATION, "45m");
+    bindRolePolicy(conf, RESTRICTED_POLICY);
+    try (AssumedRoleCredentialProvider provider
+             = new AssumedRoleCredentialProvider(conf)) {
+      LOG.info("Provider is {}", provider);
+      AWSCredentials credentials = provider.getCredentials();
+      assertNotNull("Null credentials from " + provider, credentials);
+    }
+  }
+
+  @Test
+  public void testAssumedInvalidRole() throws Throwable {
+    Configuration conf = new Configuration();
+    conf.set(ASSUMED_ROLE_ARN, ROLE_ARN_EXAMPLE);
+    interceptClosing(AWSSecurityTokenServiceException.class,
+        E_BAD_ROLE,
+        () -> new AssumedRoleCredentialProvider(conf));
+  }
+
+  @Test
+  public void testAssumeRoleFSBadARN() throws Exception {
+    describe("Attemnpt to create the FS with an invalid ARN");
+    Configuration conf = createAssumedRoleConfig();
+    conf.set(ASSUMED_ROLE_ARN, ROLE_ARN_EXAMPLE);
+    expectFileSystemCreateFailure(conf, AccessDeniedException.class,
+        E_BAD_ROLE);
+  }
+
+  @Test
+  public void testAssumeRoleNoARN() throws Exception {
+    describe("Attemnpt to create the FS with no ARN");
+    Configuration conf = createAssumedRoleConfig();
+    conf.unset(ASSUMED_ROLE_ARN);
+    expectFileSystemCreateFailure(conf,
+        IOException.class,
+        AssumedRoleCredentialProvider.E_NO_ROLE);
+  }
+
+  @Test
+  public void testAssumeRoleFSBadPolicy() throws Exception {
+    describe("Attemnpt to create the FS with malformed JSON");
+    Configuration conf = createAssumedRoleConfig();
+    // add some malformed JSON
+    conf.set(ASSUMED_ROLE_POLICY,  "}");
+    expectFileSystemCreateFailure(conf,
+        AWSBadRequestException.class,
+        "JSON");
+  }
+
+  @Test
+  public void testAssumeRoleFSBadPolicy2() throws Exception {
+    describe("Attempt to create the FS with valid but non-compliant JSON");
+    Configuration conf = createAssumedRoleConfig();
+    // add some invalid JSON
+    conf.set(ASSUMED_ROLE_POLICY, "{'json':'but not what AWS wants}");
+    expectFileSystemCreateFailure(conf,
+        AWSBadRequestException.class,
+        "Syntax errors in policy");
+  }
+
+  @Test
+  public void testAssumeRoleCannotAuthAssumedRole() throws Exception {
+    describe("Assert that you can't use assumed roles to auth assumed roles");
+
+    Configuration conf = createAssumedRoleConfig();
+    conf.set(ASSUMED_ROLE_CREDENTIALS_PROVIDER,
+        AssumedRoleCredentialProvider.NAME);
+    expectFileSystemCreateFailure(conf,
+        IOException.class,
+        AssumedRoleCredentialProvider.E_FORBIDDEN_PROVIDER);
+  }
+
+  @Test
+  public void testAssumeRoleBadInnerAuth() throws Exception {
+    describe("Try to authenticate with a keypair with spaces");
+
+    Configuration conf = createAssumedRoleConfig();
+    conf.set(ASSUMED_ROLE_CREDENTIALS_PROVIDER,
+        SimpleAWSCredentialsProvider.NAME);
+    conf.set(ACCESS_KEY, "not valid");
+    conf.set(SECRET_KEY, "not secret");
+    expectFileSystemCreateFailure(conf,
+        AWSBadRequestException.class,
+        "not a valid " +
+        "key=value pair (missing equal-sign) in Authorization header");
+  }
+
+  @Test
+  public void testAssumeRoleBadInnerAuth2() throws Exception {
+    describe("Try to authenticate with an invalid keypair");
+
+    Configuration conf = createAssumedRoleConfig();
+    conf.set(ASSUMED_ROLE_CREDENTIALS_PROVIDER,
+        SimpleAWSCredentialsProvider.NAME);
+    conf.set(ACCESS_KEY, "notvalid");
+    conf.set(SECRET_KEY, "notsecret");
+    expectFileSystemCreateFailure(conf,
+        AccessDeniedException.class,
+        "The security token included in the request is invalid");
+  }
+
+  @Test
+  public void testAssumeRoleBadSession() throws Exception {
+    describe("Try to authenticate with an invalid session");
+
+    Configuration conf = createAssumedRoleConfig();
+    conf.set(ASSUMED_ROLE_SESSION_NAME,
+        "Session names cannot hava spaces!");
+    expectFileSystemCreateFailure(conf,
+        AWSBadRequestException.class,
+        "Member must satisfy regular expression pattern");
+  }
+
+
+  /**
+   * Create the assumed role configuration.
+   * @return a config bonded to the ARN of the assumed role
+   */
+  public Configuration createAssumedRoleConfig() {
+    return createAssumedRoleConfig(getAssumedRoleARN());
+  }
+
+  /**
+   * Create a config for an assumed role; it also disables FS caching.
+   * @param roleARN ARN of role
+   * @return the new configuration
+   */
+  private Configuration createAssumedRoleConfig(String roleARN) {
+    return newAssumedRoleConfig(getContract().getConf(), roleARN);
+  }
+
+  @Test
+  public void testAssumeRoleUndefined() throws Throwable {
+    describe("Verify that you cannot instantiate the"
+        + " AssumedRoleCredentialProvider without a role ARN");
+    Configuration conf = new Configuration();
+    conf.set(ASSUMED_ROLE_ARN, "");
+    interceptClosing(IOException.class,
+        AssumedRoleCredentialProvider.E_NO_ROLE,
+        () -> new AssumedRoleCredentialProvider(conf));
+  }
+
+  @Test
+  public void testAssumedIllegalDuration() throws Throwable {
+    describe("Expect the constructor to fail if the session is to short");
+    Configuration conf = new Configuration();
+    conf.set(ASSUMED_ROLE_SESSION_DURATION, "30s");
+    interceptClosing(IllegalArgumentException.class, "",
+        () -> new AssumedRoleCredentialProvider(conf));
+  }
+
+
+  @Test
+  public void testAssumeRoleCreateFS() throws IOException {
+    describe("Create an FS client with the role and do some basic IO");
+
+    String roleARN = getAssumedRoleARN();
+    Configuration conf = createAssumedRoleConfig(roleARN);
+    Path path = new Path(getFileSystem().getUri());
+    LOG.info("Creating test FS and user {} with assumed role {}",
+        conf.get(ACCESS_KEY), roleARN);
+
+    try (FileSystem fs = path.getFileSystem(conf)) {
+      fs.getFileStatus(new Path("/"));
+      fs.mkdirs(path("testAssumeRoleFS"));
+    }
+  }
+
+  @Test
+  public void testAssumeRoleRestrictedPolicyFS() throws Exception {
+    describe("Restrict the policy for this session; verify that reads fail");
+
+    Configuration conf = createAssumedRoleConfig();
+    bindRolePolicy(conf, RESTRICTED_POLICY);
+    Path path = new Path(getFileSystem().getUri());
+    try (FileSystem fs = path.getFileSystem(conf)) {
+      forbidden("getFileStatus",
+          () -> fs.getFileStatus(new Path("/")));
+      forbidden("getFileStatus",
+          () -> fs.listStatus(new Path("/")));
+      forbidden("getFileStatus",
+          () -> fs.mkdirs(path("testAssumeRoleFS")));
+    }
+  }
+
+  /**
+   * Tighten the extra policy on the assumed role call for torrent access,
+   * and verify that it blocks all other operations.
+   * That is: any non empty policy in the assumeRole API call overrides
+   * all of the policies attached to the role before.
+   * switches the role instance to only those policies in the
+   */
+  @Test
+  public void testAssumeRolePoliciesOverrideRolePerms() throws Throwable {
+
+    describe("extra policies in assumed roles need;"
+        + " all required policies stated");
+    Configuration conf = createAssumedRoleConfig();
+
+    bindRolePolicy(conf,
+        policy(statement(false, S3_ALL_BUCKETS, S3_GET_OBJECT_TORRENT)));
+    Path path = path("testAssumeRoleStillIncludesRolePerms");
+    roleFS = (S3AFileSystem) path.getFileSystem(conf);
+    assertTouchForbidden(roleFS, path);
+  }
+
+  /**
+   * After blocking all write verbs used by S3A, try to write data (fail)
+   * and read data (succeed).
+   */
+  @Test
+  public void testReadOnlyOperations() throws Throwable {
+
+    describe("Restrict role to read only");
+    Configuration conf = createAssumedRoleConfig();
+
+    bindRolePolicy(conf,
+        policy(
+            statement(false, S3_ALL_BUCKETS, S3_PATH_WRITE_OPERATIONS),
+            STATEMENT_ALL_S3, STATEMENT_ALL_DDB));
+    Path path = methodPath();
+    roleFS = (S3AFileSystem) path.getFileSystem(conf);
+    // list the root path, expect happy
+    roleFS.listStatus(ROOT);
+
+    // touch will fail
+    assertTouchForbidden(roleFS, path);
+    // you can delete it, because it's not there and getFileStatus() is allowed
+    roleFS.delete(path, true);
+
+    //create it with the full FS
+    getFileSystem().mkdirs(path);
+
+    // and delete will not
+    assertDeleteForbidden(this.roleFS, path);
+
+    // list multipart uploads.
+    // This is part of the read policy.
+    int counter = 0;
+    MultipartUtils.UploadIterator iterator = roleFS.listUploads("/");
+    while (iterator.hasNext()) {
+      counter++;
+      iterator.next();
+    }
+    LOG.info("Found {} outstanding MPUs", counter);
+  }
+
+  /**
+   * Write successfully to the directory with full R/W access,
+   * fail to write or delete data elsewhere.
+   */
+  @SuppressWarnings("StringConcatenationMissingWhitespace")
+  @Test
+  public void testRestrictedWriteSubdir() throws Throwable {
+
+    describe("Attempt writing to paths where a role only has"
+        + " write access to a subdir of the bucket");
+    Path restrictedDir = methodPath();
+    Path child = new Path(restrictedDir, "child");
+    // the full FS
+    S3AFileSystem fs = getFileSystem();
+    fs.delete(restrictedDir, true);
+
+    Configuration conf = createAssumedRoleConfig();
+
+    bindRolePolicyStatements(conf,
+        STATEMENT_ALL_DDB,
+        statement(true, S3_ALL_BUCKETS, S3_ROOT_READ_OPERATIONS),
+        new Statement(Effects.Allow)
+          .addActions(S3_ALL_OPERATIONS)
+          .addResources(directory(restrictedDir)));
+    roleFS = (S3AFileSystem) restrictedDir.getFileSystem(conf);
+
+    roleFS.getFileStatus(ROOT);
+    roleFS.mkdirs(restrictedDir);
+    assertIsDirectory(restrictedDir);
+    // you can create an adjacent child
+    touch(roleFS, child);
+    assertIsFile(child);
+    // child delete rights
+    ContractTestUtils.assertDeleted(roleFS, child, true);
+    // parent delete rights
+    ContractTestUtils.assertDeleted(roleFS, restrictedDir, true);
+    // delete will try to create an empty parent directory marker, and may fail
+    roleFS.delete(restrictedDir, false);
+    // this sibling path has the same prefix as restrictedDir, but is
+    // adjacent. This verifies that a restrictedDir* pattern isn't matching
+    // siblings, so granting broader rights
+    Path sibling = new Path(restrictedDir.toUri() + "sibling");
+    touch(fs, sibling);
+    assertTouchForbidden(roleFS, sibling);
+    assertDeleteForbidden(roleFS, sibling);
+  }
+
+  public Path methodPath() throws IOException {
+    return path(getMethodName());
+  }
+
+  @Test
+  public void testRestrictedRename() throws Throwable {
+    describe("rename with parent paths not writeable");
+    executeRestrictedRename(createAssumedRoleConfig());
+  }
+
+  @Test
+  public void testRestrictedSingleDeleteRename() throws Throwable {
+    describe("rename with parent paths not writeable"
+        + " and multi-object delete disabled");
+    Configuration conf = createAssumedRoleConfig();
+    conf.setBoolean(ENABLE_MULTI_DELETE, false);
+    executeRestrictedRename(conf);
+  }
+
+  /**
+   * Execute a sequence of rename operations.
+   * @param conf FS configuration
+   */
+  public void executeRestrictedRename(final Configuration conf)
+      throws IOException {
+    Path basePath = methodPath();
+    Path restrictedDir = new Path(basePath, "renameSrc");
+    Path destPath = new Path(basePath, "renameDest");
+    Path child = new Path(restrictedDir, "child");
+    // the full FS
+    S3AFileSystem fs = getFileSystem();
+    fs.delete(basePath, true);
+
+    bindRolePolicyStatements(conf,
+        STATEMENT_ALL_DDB,
+        statement(true, S3_ALL_BUCKETS, S3_ROOT_READ_OPERATIONS),
+        new Statement(Effects.Allow)
+          .addActions(S3_PATH_RW_OPERATIONS)
+          .addResources(directory(restrictedDir))
+          .addResources(directory(destPath))
+    );
+    roleFS = (S3AFileSystem) restrictedDir.getFileSystem(conf);
+
+    roleFS.getFileStatus(ROOT);
+    roleFS.mkdirs(restrictedDir);
+    // you can create an adjacent child
+    touch(roleFS, child);
+
+    roleFS.delete(destPath, true);
+    // as dest doesn't exist, this will map child -> dest
+    assertRenameOutcome(roleFS, child, destPath, true);
+
+    assertIsFile(destPath);
+    assertIsDirectory(restrictedDir);
+    Path renamedDestPath = new Path(restrictedDir, destPath.getName());
+    assertRenameOutcome(roleFS, destPath, restrictedDir, true);
+    assertIsFile(renamedDestPath);
+    roleFS.delete(restrictedDir, true);
+    roleFS.delete(destPath, true);
+  }
+
+  @Test
+  public void testRestrictedRenameReadOnlyData() throws Throwable {
+    describe("rename with source read only, multidelete");
+    executeRenameReadOnlyData(createAssumedRoleConfig());
+  }
+
+  @Test
+  public void testRestrictedRenameReadOnlySingleDelete() throws Throwable {
+    describe("rename with source read only single delete");
+    Configuration conf = createAssumedRoleConfig();
+    conf.setBoolean(ENABLE_MULTI_DELETE, false);
+    executeRenameReadOnlyData(conf);
+  }
+
+  /**
+   * Execute a sequence of rename operations where the source
+   * data is read only to the client calling rename().
+   * This will cause the inner delete() operations to fail, whose outcomes
+   * are explored.
+   * Multiple files are created (in parallel) for some renames, so exploring
+   * the outcome on bulk delete calls, including verifying that a
+   * MultiObjectDeleteException is translated to an AccessDeniedException.
+   * <ol>
+   *   <li>The exception raised is AccessDeniedException,
+   *   from single and multi DELETE calls.</li>
+   *   <li>It happens after the COPY. Not ideal, but, well, we can't pretend
+   *   it's a filesystem forever.</li>
+   * </ol>
+   * @param conf FS configuration
+   */
+  public void executeRenameReadOnlyData(final Configuration conf)
+      throws Exception {
+    assume("Does not work with S3Guard", !getFileSystem().hasMetadataStore());
+    Path basePath = methodPath();
+    Path destDir = new Path(basePath, "renameDest");
+    Path readOnlyDir = new Path(basePath, "readonlyDir");
+    Path readOnlyFile = new Path(readOnlyDir, "readonlyChild");
+
+    // the full FS
+    S3AFileSystem fs = getFileSystem();
+    fs.delete(basePath, true);
+
+    // this file is readable by the roleFS, but cannot be deleted
+    touch(fs, readOnlyFile);
+
+    bindRolePolicyStatements(conf,
+        STATEMENT_ALL_DDB,
+        statement(true, S3_ALL_BUCKETS, S3_ROOT_READ_OPERATIONS),
+          new Statement(Effects.Allow)
+            .addActions(S3_PATH_RW_OPERATIONS)
+            .addResources(directory(destDir))
+    );
+    roleFS = (S3AFileSystem) destDir.getFileSystem(conf);
+
+    roleFS.delete(destDir, true);
+    roleFS.mkdirs(destDir);
+    // rename will fail in the delete phase
+    forbidden(readOnlyFile.toString(),
+        () -> roleFS.rename(readOnlyFile, destDir));
+
+    // and the source file is still there
+    assertIsFile(readOnlyFile);
+
+    // but so is the copied version, because there's no attempt
+    // at rollback, or preflight checking on the delete permissions
+    Path renamedFile = new Path(destDir, readOnlyFile.getName());
+
+    assertIsFile(renamedFile);
+
+    ContractTestUtils.assertDeleted(roleFS, renamedFile, true);
+    assertFileCount("Empty Dest Dir", roleFS,
+        destDir, 0);
+    // create a set of files
+    // this is done in parallel as it is 10x faster on a long-haul test run.
+    int range = 10;
+    touchFiles(fs, readOnlyDir, range);
+    // don't forget about that original file!
+    final long createdFiles = range + 1;
+    // are they all there?
+    assertFileCount("files ready to rename", roleFS,
+        readOnlyDir, createdFiles);
+
+    // try to rename the directory
+    LOG.info("Renaming readonly files {} to {}", readOnlyDir, destDir);
+    AccessDeniedException ex = forbidden("",
+        () -> roleFS.rename(readOnlyDir, destDir));
+    LOG.info("Result of renaming read-only files is AccessDeniedException", ex);
+    assertFileCount("files copied to the destination", roleFS,
+        destDir, createdFiles);
+    assertFileCount("files in the source directory", roleFS,
+        readOnlyDir, createdFiles);
+
+    // and finally (so as to avoid the delay of POSTing some more objects,
+    // delete that r/o source
+    forbidden("", () -> roleFS.delete(readOnlyDir, true));
+  }
+
+  /**
+   * Parallel-touch a set of files in the destination directory.
+   * @param fs filesystem
+   * @param destDir destination
+   * @param range range 1..range inclusive of files to create.
+   */
+  public void touchFiles(final S3AFileSystem fs,
+      final Path destDir,
+      final int range) {
+    IntStream.rangeClosed(1, range).parallel().forEach(
+        (i) -> eval(() -> touch(fs, new Path(destDir, "file-" + i))));
+  }
+
+  @Test
+  public void testRestrictedCommitActions() throws Throwable {
+    describe("Attempt commit operations against a path with restricted rights");
+    Configuration conf = createAssumedRoleConfig();
+    conf.setBoolean(CommitConstants.MAGIC_COMMITTER_ENABLED, true);
+    final int uploadPartSize = 5 * 1024 * 1024;
+
+    Path basePath = methodPath();
+    Path readOnlyDir = new Path(basePath, "readOnlyDir");
+    Path writeableDir = new Path(basePath, "writeableDir");
+    // the full FS
+    S3AFileSystem fs = getFileSystem();
+    fs.delete(basePath, true);
+    fs.mkdirs(readOnlyDir);
+
+    bindRolePolicyStatements(conf,
+        STATEMENT_ALL_DDB,
+        statement(true, S3_ALL_BUCKETS, S3_ROOT_READ_OPERATIONS),
+        new Statement(Effects.Allow)
+            .addActions(S3_PATH_RW_OPERATIONS)
+            .addResources(directory(writeableDir))
+    );
+    roleFS = (S3AFileSystem) writeableDir.getFileSystem(conf);
+    CommitOperations fullOperations = new CommitOperations(fs);
+    CommitOperations operations = new CommitOperations(roleFS);
+
+    File localSrc = File.createTempFile("source", "");
+    writeCSVData(localSrc);
+    Path uploadDest = new Path(readOnlyDir, "restricted.csv");
+
+    forbidden("initiate MultiPartUpload",
+        () -> {
+          return operations.uploadFileToPendingCommit(localSrc,
+              uploadDest, "", uploadPartSize);
+        });
+    // delete the file
+    localSrc.delete();
+    // create a directory there
+    localSrc.mkdirs();
+
+    // create some local files and upload them with permissions
+
+    int range = 2;
+    IntStream.rangeClosed(1, range)
+        .parallel()
+        .forEach((i) -> eval(() -> {
+          String name = "part-000" + i;
+          File src = new File(localSrc, name);
+          Path dest = new Path(readOnlyDir, name);
+          writeCSVData(src);
+          SinglePendingCommit pending =
+              fullOperations.uploadFileToPendingCommit(src, dest, "",
+                  uploadPartSize);
+          pending.save(fs, new Path(readOnlyDir,
+              name + CommitConstants.PENDING_SUFFIX), true);
+          assertTrue(src.delete());
+        }));
+
+    try {
+      // we expect to be able to list all the files here
+      Pair<PendingSet, List<Pair<LocatedFileStatus, IOException>>>
+          pendingCommits = operations.loadSinglePendingCommits(readOnlyDir,
+          true);
+
+      // all those commits must fail
+      List<SinglePendingCommit> commits = pendingCommits.getLeft().getCommits();
+      assertEquals(range, commits.size());
+      commits.parallelStream().forEach(
+          (c) -> {
+            CommitOperations.MaybeIOE maybeIOE = operations.commit(c, "origin");
+            Path path = c.destinationPath();
+            assertCommitAccessDenied(path, maybeIOE);
+          });
+
+      // fail of all list and abort of .pending files.
+      LOG.info("abortAllSinglePendingCommits({})", readOnlyDir);
+      assertCommitAccessDenied(readOnlyDir,
+          operations.abortAllSinglePendingCommits(readOnlyDir, true));
+
+      // try writing a magic file
+      Path magicDestPath = new Path(readOnlyDir,
+          CommitConstants.MAGIC + "/" + "magic.txt");
+      forbidden("", () -> {
+        touch(roleFS, magicDestPath);
+        // shouldn't get here; if we do: return the existence of the 0-byte
+        // dest file.
+        return fs.getFileStatus(magicDestPath);
+      });
+
+      // a recursive list and abort is blocked.
+      forbidden("",
+          () -> operations.abortPendingUploadsUnderPath(readOnlyDir));
+    } finally {
+      LOG.info("Cleanup");
+      fullOperations.abortPendingUploadsUnderPath(readOnlyDir);
+    }
+  }
+
+  /**
+   * Verifies that an operation returning a "MaybeIOE" failed
+   * with an AccessDeniedException in the maybe instance.
+   * @param path path operated on
+   * @param maybeIOE result to inspect
+   */
+  public void assertCommitAccessDenied(final Path path,
+      final CommitOperations.MaybeIOE maybeIOE) {
+    IOException ex = maybeIOE.getException();
+    assertNotNull("no IOE in " + maybeIOE + " for " + path, ex);
+    if (!(ex instanceof AccessDeniedException)) {
+      ContractTestUtils.fail("Wrong exception class for commit to "
+          + path, ex);
+    }
+  }
+
+  /**
+   * Write some CSV data to a local file.
+   * @param localSrc local file
+   * @throws IOException failure
+   */
+  public void writeCSVData(final File localSrc) throws IOException {
+    try(FileOutputStream fo = new FileOutputStream(localSrc)) {
+      fo.write("1, true".getBytes());
+    }
+  }
+
+  @Test
+  public void testPartialDelete() throws Throwable {
+    describe("delete with part of the child tree read only; multidelete");
+    executePartialDelete(createAssumedRoleConfig());
+  }
+
+  @Test
+  public void testPartialDeleteSingleDelete() throws Throwable {
+    describe("delete with part of the child tree read only");
+    Configuration conf = createAssumedRoleConfig();
+    conf.setBoolean(ENABLE_MULTI_DELETE, false);
+    executePartialDelete(conf);
+  }
+
+  /**
+   * Have a directory with full R/W permissions, but then remove
+   * write access underneath, and try to delete it.
+   * @param conf FS configuration
+   */
+  public void executePartialDelete(final Configuration conf)
+      throws Exception {
+    Path destDir = methodPath();
+    Path readOnlyDir = new Path(destDir, "readonlyDir");
+
+    // the full FS
+    S3AFileSystem fs = getFileSystem();
+    fs.delete(destDir, true);
+
+    bindRolePolicyStatements(conf,
+        STATEMENT_ALL_DDB,
+        statement(true, S3_ALL_BUCKETS, S3_ALL_OPERATIONS),
+        new Statement(Effects.Deny)
+            .addActions(S3_PATH_WRITE_OPERATIONS)
+            .addResources(directory(readOnlyDir))
+    );
+    roleFS = (S3AFileSystem) destDir.getFileSystem(conf);
+
+    int range = 10;
+    touchFiles(fs, readOnlyDir, range);
+    touchFiles(roleFS, destDir, range);
+    forbidden("", () -> roleFS.delete(readOnlyDir, true));
+    forbidden("", () -> roleFS.delete(destDir, true));
+
+    // and although you can't delete under the path, if the file doesn't
+    // exist, the delete call fails fast.
+    Path pathWhichDoesntExist = new Path(readOnlyDir, "no-such-path");
+    assertFalse("deleting " + pathWhichDoesntExist,
+        roleFS.delete(pathWhichDoesntExist, true));
+  }
+
+  /**
+   * Assert that the number of files in a destination matches that expected.
+   * @param text text to use in the message
+   * @param fs filesystem
+   * @param path path to list (recursively)
+   * @param expected expected count
+   * @throws IOException IO problem
+   */
+  private static void assertFileCount(String text, FileSystem fs,
+      Path path, long expected)
+      throws IOException {
+    List<String> files = new ArrayList<>();
+    applyLocatedFiles(fs.listFiles(path, true),
+        (status) -> files.add(status.getPath().toString()));
+    long actual = files.size();
+    if (actual != expected) {
+      String ls = files.stream().collect(Collectors.joining("\n"));
+      fail(text + ": expected " + expected + " files in " + path
+          + " but got " + actual + "\n" + ls);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumedRoleCommitOperations.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumedRoleCommitOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumedRoleCommitOperations.java
new file mode 100644
index 0000000..bb66268
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumedRoleCommitOperations.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.auth;
+
+import java.io.IOException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.S3AUtils;
+import org.apache.hadoop.fs.s3a.commit.ITestCommitOperations;
+
+import static org.apache.hadoop.fs.s3a.Constants.ASSUMED_ROLE_ARN;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume;
+import static org.apache.hadoop.fs.s3a.auth.RoleModel.*;
+import static org.apache.hadoop.fs.s3a.auth.RolePolicies.*;
+import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.*;
+
+/**
+ * Verify that the commit operations work with a restricted set of operations.
+ * The superclass, {@link ITestCommitOperations} turns on an inconsistent client
+ * to see how things work in the presence of inconsistency.
+ * These tests disable it, to remove that as a factor in these tests, which are
+ * verifying that the policy settings to enabled MPU list/commit/abort are all
+ * enabled properly.
+ */
+public class ITestAssumedRoleCommitOperations extends ITestCommitOperations {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(ITestAssumedRoleCommitOperations.class);
+
+  /**
+   * The restricted directory.
+   */
+  private Path restrictedDir;
+
+  /**
+   * A role FS; if non-null it is closed in teardown.
+   */
+  private S3AFileSystem roleFS;
+
+  @Override
+  public boolean useInconsistentClient() {
+    return false;
+  }
+
+  @Override
+  public void setup() throws Exception {
+    super.setup();
+    assumeRoleTests();
+
+    restrictedDir = super.path("restricted");
+    Configuration conf = newAssumedRoleConfig(getConfiguration(),
+        getAssumedRoleARN());
+    bindRolePolicyStatements(conf,
+        STATEMENT_ALL_DDB,
+        statement(true, S3_ALL_BUCKETS, S3_ROOT_READ_OPERATIONS),
+        new RoleModel.Statement(RoleModel.Effects.Allow)
+            .addActions(S3_PATH_RW_OPERATIONS)
+            .addResources(directory(restrictedDir))
+    );
+    roleFS = (S3AFileSystem) restrictedDir.getFileSystem(conf);
+  }
+
+
+  @Override
+  public void teardown() throws Exception {
+    S3AUtils.closeAll(LOG, roleFS);
+    // switches getFileSystem() back to the full FS.
+    roleFS = null;
+    super.teardown();
+  }
+
+  private void assumeRoleTests() {
+    assume("No ARN for role tests", !getAssumedRoleARN().isEmpty());
+  }
+
+  /**
+   * The overridden operation returns the roleFS, so that test cases
+   * in the superclass run under restricted rights.
+   * There's special handling in startup to avoid NPEs
+   * @return {@link #roleFS}
+   */
+  @Override
+  public S3AFileSystem getFileSystem() {
+    return roleFS != null ? roleFS : getFullFileSystem();
+  }
+
+  /**
+   * Get the FS with full access rights.
+   * @return the FS created by the superclass.
+   */
+  public S3AFileSystem getFullFileSystem() {
+    return super.getFileSystem();
+  }
+
+  /**
+   * switch to an inconsistent path if in inconsistent mode.
+   * {@inheritDoc}
+   */
+  @Override
+  protected Path path(String filepath) throws IOException {
+    return new Path(restrictedDir, filepath);
+  }
+
+
+  private String getAssumedRoleARN() {
+    return getContract().getConf().getTrimmed(ASSUMED_ROLE_ARN, "");
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java
new file mode 100644
index 0000000..9fa2600
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.auth;
+
+import java.nio.file.AccessDeniedException;
+import java.util.concurrent.Callable;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.test.GenericTestUtils;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
+import static org.apache.hadoop.fs.s3a.Constants.*;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
+import static org.apache.hadoop.fs.s3a.auth.RoleModel.*;
+import static org.apache.hadoop.fs.s3a.auth.RolePolicies.*;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Helper class for testing roles.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public final class RoleTestUtils {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(RoleTestUtils.class);
+
+  private static final RoleModel MODEL = new RoleModel();
+
+
+  /** Example ARN of a role. */
+  public static final String ROLE_ARN_EXAMPLE
+      = "arn:aws:iam::9878543210123:role/role-s3-restricted";
+
+
+  /** Deny GET requests to all buckets. */
+  public static final Statement DENY_GET_ALL =
+      statement(false, S3_ALL_BUCKETS, S3_GET_OBJECT);
+
+  /**
+   * This is AWS policy removes read access.
+   */
+  public static final Policy RESTRICTED_POLICY = policy(DENY_GET_ALL);
+
+
+  /**
+   * Error message to get from the AWS SDK if you can't assume the role.
+   */
+  public static final String E_BAD_ROLE
+      = "Not authorized to perform sts:AssumeRole";
+
+  private RoleTestUtils() {
+  }
+
+  /**
+   * Bind the configuration's {@code ASSUMED_ROLE_POLICY} option to
+   * the given policy.
+   * @param conf configuration to patch
+   * @param policy policy to apply
+   * @return the modified configuration
+   * @throws JsonProcessingException JSON marshalling error
+   */
+  public static Configuration bindRolePolicy(final Configuration conf,
+      final Policy policy) throws JsonProcessingException {
+    String p = MODEL.toJson(policy);
+    LOG.info("Setting role policy to policy of size {}:\n{}", p.length(), p);
+    conf.set(ASSUMED_ROLE_POLICY, p);
+    return conf;
+  }
+
+  /**
+   * Wrap a set of statements with a policy and bind the configuration's
+   * {@code ASSUMED_ROLE_POLICY} option to it.
+   * @param conf configuration to patch
+   * @param statements statements to aggregate
+   * @return the modified configuration
+   * @throws JsonProcessingException JSON marshalling error
+   */
+  public static Configuration bindRolePolicyStatements(
+      final Configuration conf,
+      final Statement... statements) throws JsonProcessingException {
+    return bindRolePolicy(conf, policy(statements));
+  }
+
+
+  /**
+   * Try to delete a file, verify that it is not allowed.
+   * @param fs filesystem
+   * @param path path
+   */
+  public static void assertDeleteForbidden(final FileSystem fs, final Path path)
+      throws Exception {
+    intercept(AccessDeniedException.class, "",
+        () -> fs.delete(path, true));
+  }
+
+  /**
+   * Try to touch a file, verify that it is not allowed.
+   * @param fs filesystem
+   * @param path path
+   */
+  public static void assertTouchForbidden(final FileSystem fs, final Path path)
+      throws Exception {
+    intercept(AccessDeniedException.class, "",
+        "Caller could create file at " + path,
+        () -> {
+          touch(fs, path);
+          return fs.getFileStatus(path);
+        });
+  }
+
+  /**
+   * Create a config for an assumed role; it also disables FS caching.
+   * @param srcConf source config: this is not modified
+   * @param roleARN ARN of role
+   * @return the new configuration
+   */
+  public static Configuration newAssumedRoleConfig(
+      final Configuration srcConf,
+      final String roleARN) {
+    Configuration conf = new Configuration(srcConf);
+    conf.set(AWS_CREDENTIALS_PROVIDER, AssumedRoleCredentialProvider.NAME);
+    conf.set(ASSUMED_ROLE_ARN, roleARN);
+    conf.set(ASSUMED_ROLE_SESSION_NAME, "valid");
+    conf.set(ASSUMED_ROLE_SESSION_DURATION, "15m");
+    disableFilesystemCaching(conf);
+    return conf;
+  }
+
+  /**
+   * Assert that an operation is forbidden.
+   * @param contained contained text, may be null
+   * @param eval closure to evaluate
+   * @param <T> type of closure
+   * @return the access denied exception
+   * @throws Exception any other exception
+   */
+  public static <T> AccessDeniedException forbidden(
+      String contained,
+      Callable<T> eval)
+      throws Exception {
+    AccessDeniedException ex = intercept(AccessDeniedException.class, eval);
+    GenericTestUtils.assertExceptionContains(contained, ex);
+    return ex;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractCommitITest.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractCommitITest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractCommitITest.java
index 04676db..4730a90 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractCommitITest.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractCommitITest.java
@@ -208,7 +208,9 @@ public abstract class AbstractCommitITest extends AbstractS3ATestBase {
    * @param p probability of a throttling occurring: 0-1.0
    */
   protected void setThrottling(float p) {
-    inconsistentClient.setThrottleProbability(p);
+    if (inconsistentClient != null) {
+      inconsistentClient.setThrottleProbability(p);
+    }
   }
 
   /**
@@ -217,7 +219,9 @@ public abstract class AbstractCommitITest extends AbstractS3ATestBase {
    * @param limit limit to number of calls which fail
    */
   protected void setThrottling(float p, int limit) {
-    inconsistentClient.setThrottleProbability(p);
+    if (inconsistentClient != null) {
+      inconsistentClient.setThrottleProbability(p);
+    }
     setFailureLimit(limit);
   }
 
@@ -235,7 +239,9 @@ public abstract class AbstractCommitITest extends AbstractS3ATestBase {
    * @param limit limit to number of calls which fail
    */
   private void setFailureLimit(int limit) {
-    inconsistentClient.setFailureLimit(limit);
+    if (inconsistentClient != null) {
+      inconsistentClient.setFailureLimit(limit);
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9a013b25/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/ITestCommitOperations.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/ITestCommitOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/ITestCommitOperations.java
index 2a98382..2886a99 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/ITestCommitOperations.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/ITestCommitOperations.java
@@ -528,7 +528,9 @@ public class ITestCommitOperations extends AbstractCommitITest {
   @Test
   public void testWriteNormalStream() throws Throwable {
     S3AFileSystem fs = getFileSystem();
-    Assume.assumeTrue(fs.hasCapability(STREAM_CAPABILITY_MAGIC_OUTPUT));
+    Assume.assumeTrue(
+        "Filesystem does not have magic support enabled: " + fs,
+        fs.hasCapability(STORE_CAPABILITY_MAGIC_COMMITTER));
 
     Path destFile = path("normal");
     try (FSDataOutputStream out = fs.create(destFile, true)) {


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[21/21] hadoop git commit: HADOOP-15206. BZip2 drops and duplicates records when input split size is small. Contributed by Aki Tanaka

Posted by ha...@apache.org.

HADOOP-15206. BZip2 drops and duplicates records when input split size is small. Contributed by Aki Tanaka


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0898ff42
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0898ff42
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0898ff42

Branch: refs/heads/HDFS-12996
Commit: 0898ff42e9e5c53f2fce7ccdeb4e1cd7d0f123b3
Parents: 4c2119f
Author: Jason Lowe <jl...@apache.org>
Authored: Fri Feb 16 14:49:00 2018 -0600
Committer: Jason Lowe <jl...@apache.org>
Committed: Fri Feb 16 14:49:00 2018 -0600

----------------------------------------------------------------------
 .../apache/hadoop/io/compress/BZip2Codec.java   | 30 +++++++++++++++++++-
 .../hadoop/mapred/TestTextInputFormat.java      |  8 ++++++
 2 files changed, 37 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/0898ff42/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
index db78118..3c78cfc 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
@@ -362,9 +362,29 @@ public class BZip2Codec implements Configurable, SplittableCompressionCodec {
       bufferedIn = new BufferedInputStream(super.in);
       this.startingPos = super.getPos();
       this.readMode = readMode;
+      long numSkipped = 0;
       if (this.startingPos == 0) {
         // We only strip header if it is start of file
         bufferedIn = readStreamHeader();
+      } else if (this.readMode == READ_MODE.BYBLOCK  &&
+          this.startingPos <= HEADER_LEN + SUB_HEADER_LEN) {
+        // When we're in BYBLOCK mode and the start position is >=0
+        // and < HEADER_LEN + SUB_HEADER_LEN, we should skip to after
+        // start of the first bz2 block to avoid duplicated records
+        numSkipped = HEADER_LEN + SUB_HEADER_LEN + 1 - this.startingPos;
+        long skipBytes = numSkipped;
+        while (skipBytes > 0) {
+          long s = bufferedIn.skip(skipBytes);
+          if (s > 0) {
+            skipBytes -= s;
+          } else {
+            if (bufferedIn.read() == -1) {
+              break; // end of the split
+            } else {
+              skipBytes--;
+            }
+          }
+        }
       }
       input = new CBZip2InputStream(bufferedIn, readMode);
       if (this.isHeaderStripped) {
@@ -375,7 +395,15 @@ public class BZip2Codec implements Configurable, SplittableCompressionCodec {
         input.updateReportedByteCount(SUB_HEADER_LEN);
       }
 
-      this.updatePos(false);
+      if (numSkipped > 0) {
+        input.updateReportedByteCount((int) numSkipped);
+      }
+
+      // To avoid dropped records, not advertising a new byte position
+      // when we are in BYBLOCK mode and the start position is 0
+      if (!(this.readMode == READ_MODE.BYBLOCK && this.startingPos == 0)) {
+        this.updatePos(false);
+      }
     }
 
     private BufferedInputStream readStreamHeader() throws IOException {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0898ff42/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTextInputFormat.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTextInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTextInputFormat.java
index 0ea1d6d..22d9a57 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTextInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTextInputFormat.java
@@ -183,6 +183,14 @@ public class TestTextInputFormat {
     // corner case when we have byte alignment and position of stream are same
     verifyPartitions(471507, 218, file, codec, conf);
     verifyPartitions(473608, 110, file, codec, conf);
+
+    // corner case when split size is small and position of stream is before
+    // the first BZip2 block
+    verifyPartitions(100, 20, file, codec, conf);
+    verifyPartitions(100, 25, file, codec, conf);
+    verifyPartitions(100, 30, file, codec, conf);
+    verifyPartitions(100, 50, file, codec, conf);
+    verifyPartitions(100, 100, file, codec, conf);
   }
 
   // Test a corner case when position of stream is right after BZip2 marker


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[13/21] hadoop git commit: YARN-7677. Docker image cannot set HADOOP_CONF_DIR. Contributed by Jim Brennan

Posted by ha...@apache.org.

YARN-7677. Docker image cannot set HADOOP_CONF_DIR. Contributed by Jim Brennan


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8013475d
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8013475d
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8013475d

Branch: refs/heads/HDFS-12996
Commit: 8013475d447a8377b5aed858208bf8b91dd32366
Parents: 0b489e5
Author: Jason Lowe <jl...@apache.org>
Authored: Thu Feb 15 17:09:00 2018 -0600
Committer: Jason Lowe <jl...@apache.org>
Committed: Thu Feb 15 17:09:00 2018 -0600

----------------------------------------------------------------------
 .../java/org/apache/hadoop/yarn/util/Apps.java  |  22 ++-
 .../yarn/util/AuxiliaryServiceHelper.java       |   2 +-
 .../server/nodemanager/ContainerExecutor.java   |  62 ++++++---
 .../nodemanager/LinuxContainerExecutor.java     |   8 --
 .../launcher/ContainerLaunch.java               |  88 ++++++++----
 .../runtime/DefaultLinuxContainerRuntime.java   |   6 -
 .../DelegatingLinuxContainerRuntime.java        |  11 --
 .../runtime/DockerLinuxContainerRuntime.java    |   7 -
 .../runtime/ContainerRuntime.java               |  11 --
 .../launcher/TestContainerLaunch.java           | 133 +++++++++++++++++--
 10 files changed, 240 insertions(+), 110 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/8013475d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
index 685c6d3..1c90d55 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
@@ -23,6 +23,7 @@ import static org.apache.hadoop.yarn.util.StringHelper.join;
 import static org.apache.hadoop.yarn.util.StringHelper.sjoin;
 
 import java.io.File;
+import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.regex.Matcher;
@@ -105,7 +106,26 @@ public class Apps {
       }
     }
   }
-  
+
+  /**
+   *
+   * @param envString String containing env variable definitions
+   * @param classPathSeparator String that separates the definitions
+   * @return ArrayList of environment variable names
+   */
+  public static ArrayList<String> getEnvVarsFromInputString(String envString,
+      String classPathSeparator) {
+    ArrayList<String> envList = new ArrayList<>();
+    if (envString != null && envString.length() > 0) {
+      Matcher varValMatcher = VARVAL_SPLITTER.matcher(envString);
+      while (varValMatcher.find()) {
+        String envVar = varValMatcher.group(1);
+        envList.add(envVar);
+      }
+    }
+    return envList;
+  }
+
   /**
    * This older version of this method is kept around for compatibility
    * because downstream frameworks like Spark and Tez have been using it.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8013475d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AuxiliaryServiceHelper.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AuxiliaryServiceHelper.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AuxiliaryServiceHelper.java
index cb118f5..1374d96 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AuxiliaryServiceHelper.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AuxiliaryServiceHelper.java
@@ -45,7 +45,7 @@ public class AuxiliaryServiceHelper {
         Base64.encodeBase64String(byteData));
   }
 
-  private static String getPrefixServiceName(String serviceName) {
+  public static String getPrefixServiceName(String serviceName) {
     return NM_AUX_SERVICE + serviceName;
   }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8013475d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
index f4279a3..01cd992 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
@@ -27,6 +27,7 @@ import java.net.UnknownHostException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.LinkedHashSet;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
@@ -316,14 +317,15 @@ public abstract class ContainerExecutor implements Configurable {
    * @param command the command that will be run
    * @param logDir the log dir to which to copy debugging information
    * @param user the username of the job owner
+   * @param nmVars the set of environment vars that are explicitly set by NM
    * @throws IOException if any errors happened writing to the OutputStream,
    * while creating symlinks
    */
   public void writeLaunchEnv(OutputStream out, Map<String, String> environment,
       Map<Path, List<String>> resources, List<String> command, Path logDir,
-      String user) throws IOException {
+      String user, LinkedHashSet<String> nmVars) throws IOException {
     this.writeLaunchEnv(out, environment, resources, command, logDir, user,
-        ContainerLaunch.CONTAINER_SCRIPT);
+        ContainerLaunch.CONTAINER_SCRIPT, nmVars);
   }
 
   /**
@@ -339,14 +341,15 @@ public abstract class ContainerExecutor implements Configurable {
    * @param logDir the log dir to which to copy debugging information
    * @param user the username of the job owner
    * @param outFilename the path to which to write the launch environment
+   * @param nmVars the set of environment vars that are explicitly set by NM
    * @throws IOException if any errors happened writing to the OutputStream,
    * while creating symlinks
    */
   @VisibleForTesting
   public void writeLaunchEnv(OutputStream out, Map<String, String> environment,
       Map<Path, List<String>> resources, List<String> command, Path logDir,
-      String user, String outFilename) throws IOException {
-    updateEnvForWhitelistVars(environment);
+      String user, String outFilename, LinkedHashSet<String> nmVars)
+      throws IOException {
 
     ContainerLaunch.ShellScriptBuilder sb =
         ContainerLaunch.ShellScriptBuilder.create();
@@ -361,8 +364,40 @@ public abstract class ContainerExecutor implements Configurable {
 
     if (environment != null) {
       sb.echo("Setting up env variables");
+      // Whitelist environment variables are treated specially.
+      // Only add them if they are not already defined in the environment.
+      // Add them using special syntax to prevent them from eclipsing
+      // variables that may be set explicitly in the container image (e.g,
+      // in a docker image).  Put these before the others to ensure the
+      // correct expansion is used.
+      for(String var : whitelistVars) {
+        if (!environment.containsKey(var)) {
+          String val = getNMEnvVar(var);
+          if (val != null) {
+            sb.whitelistedEnv(var, val);
+          }
+        }
+      }
+      // Now write vars that were set explicitly by nodemanager, preserving
+      // the order they were written in.
+      for (String nmEnvVar : nmVars) {
+        sb.env(nmEnvVar, environment.get(nmEnvVar));
+      }
+      // Now write the remaining environment variables.
       for (Map.Entry<String, String> env : environment.entrySet()) {
-        sb.env(env.getKey(), env.getValue());
+        if (!nmVars.contains(env.getKey())) {
+          sb.env(env.getKey(), env.getValue());
+        }
+      }
+      // Add the whitelist vars to the environment.  Do this after writing
+      // environment variables so they are not written twice.
+      for(String var : whitelistVars) {
+        if (!environment.containsKey(var)) {
+          String val = getNMEnvVar(var);
+          if (val != null) {
+            environment.put(var, val);
+          }
+        }
       }
     }
 
@@ -663,23 +698,6 @@ public abstract class ContainerExecutor implements Configurable {
     }
   }
 
-  /**
-   * Propagate variables from the nodemanager's environment into the
-   * container's environment if unspecified by the container.
-   * @param env the environment to update
-   * @see org.apache.hadoop.yarn.conf.YarnConfiguration#NM_ENV_WHITELIST
-   */
-  protected void updateEnvForWhitelistVars(Map<String, String> env) {
-    for(String var : whitelistVars) {
-      if (!env.containsKey(var)) {
-        String val = getNMEnvVar(var);
-        if (val != null) {
-          env.put(var, val);
-        }
-      }
-    }
-  }
-
   @VisibleForTesting
   protected String getNMEnvVar(String varname) {
     return System.getenv(varname);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8013475d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index fe54e2c..44edc21 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -66,7 +66,6 @@ import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import java.util.Map;
 import java.util.regex.Pattern;
 
 import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.*;
@@ -473,13 +472,6 @@ public class LinuxContainerExecutor extends ContainerExecutor {
   }
 
   @Override
-  protected void updateEnvForWhitelistVars(Map<String, String> env) {
-    if (linuxContainerRuntime.useWhitelistEnv(env)) {
-      super.updateEnvForWhitelistVars(env);
-    }
-  }
-
-  @Override
   public int launchContainer(ContainerStartContext ctx)
       throws IOException, ConfigurationException {
     Container container = ctx.getContainer();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8013475d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
index 112f54a..ca62a5c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
@@ -33,7 +33,9 @@ import java.util.ArrayList;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
+import java.util.LinkedHashSet;
 import java.util.Map;
+import java.util.Set;
 import java.util.Map.Entry;
 import java.util.concurrent.Callable;
 import java.util.concurrent.atomic.AtomicBoolean;
@@ -217,6 +219,9 @@ public class ContainerLaunch implements Callable<Integer> {
           launchContext, containerLogDir);
       // /////////////////////////// End of variable expansion
 
+      // Use this to track variables that are added to the environment by nm.
+      LinkedHashSet<String> nmEnvVars = new LinkedHashSet<String>();
+
       FileContext lfs = FileContext.getLocalFSFileContext();
 
       Path nmPrivateContainerScriptPath = dirsHandler.getLocalPathForWrite(
@@ -261,7 +266,7 @@ public class ContainerLaunch implements Callable<Integer> {
       }
 
       // Set the token location too.
-      environment.put(
+      addToEnvMap(environment, nmEnvVars,
           ApplicationConstants.CONTAINER_TOKEN_FILE_ENV_NAME,
           new Path(containerWorkDir,
               FINAL_CONTAINER_TOKENS_FILE).toUri().getPath());
@@ -272,14 +277,15 @@ public class ContainerLaunch implements Callable<Integer> {
                    EnumSet.of(CREATE, OVERWRITE))) {
         // Sanitize the container's environment
         sanitizeEnv(environment, containerWorkDir, appDirs, userLocalDirs,
-            containerLogDirs, localResources, nmPrivateClasspathJarDir);
+            containerLogDirs, localResources, nmPrivateClasspathJarDir,
+            nmEnvVars);
 
         prepareContainer(localResources, containerLocalDirs);
 
         // Write out the environment
         exec.writeLaunchEnv(containerScriptOutStream, environment,
             localResources, launchContext.getCommands(),
-            containerLogDir, user);
+            containerLogDir, user, nmEnvVars);
       }
       // /////////// End of writing out container-script
 
@@ -1171,6 +1177,9 @@ public class ContainerLaunch implements Callable<Integer> {
 
     public abstract void env(String key, String value) throws IOException;
 
+    public abstract void whitelistedEnv(String key, String value)
+        throws IOException;
+
     public abstract void echo(String echoStr) throws IOException;
 
     public final void symlink(Path src, Path dst) throws IOException {
@@ -1291,6 +1300,11 @@ public class ContainerLaunch implements Callable<Integer> {
     }
 
     @Override
+    public void whitelistedEnv(String key, String value) throws IOException {
+      line("export ", key, "=${", key, ":-", "\"", value, "\"}");
+    }
+
+    @Override
     public void echo(final String echoStr) throws IOException {
       line("echo \"" + echoStr + "\"");
     }
@@ -1381,6 +1395,11 @@ public class ContainerLaunch implements Callable<Integer> {
     }
 
     @Override
+    public void whitelistedEnv(String key, String value) throws IOException {
+      env(key, value);
+    }
+
+    @Override
     public void echo(final String echoStr) throws IOException {
       lineWithLenCheck("@echo \"", echoStr, "\"");
     }
@@ -1435,60 +1454,70 @@ public class ContainerLaunch implements Callable<Integer> {
       putEnvIfNotNull(environment, variable, System.getenv(variable));
     }
   }
-  
+
+  private static void addToEnvMap(
+      Map<String, String> envMap, Set<String> envSet,
+      String envName, String envValue) {
+    envMap.put(envName, envValue);
+    envSet.add(envName);
+  }
+
   public void sanitizeEnv(Map<String, String> environment, Path pwd,
       List<Path> appDirs, List<String> userLocalDirs, List<String>
-      containerLogDirs,
-      Map<Path, List<String>> resources,
-      Path nmPrivateClasspathJarDir) throws IOException {
+      containerLogDirs, Map<Path, List<String>> resources,
+      Path nmPrivateClasspathJarDir,
+      Set<String> nmVars) throws IOException {
     /**
      * Non-modifiable environment variables
      */
 
-    environment.put(Environment.CONTAINER_ID.name(), container
-        .getContainerId().toString());
+    addToEnvMap(environment, nmVars, Environment.CONTAINER_ID.name(),
+        container.getContainerId().toString());
 
-    environment.put(Environment.NM_PORT.name(),
+    addToEnvMap(environment, nmVars, Environment.NM_PORT.name(),
       String.valueOf(this.context.getNodeId().getPort()));
 
-    environment.put(Environment.NM_HOST.name(), this.context.getNodeId()
-      .getHost());
+    addToEnvMap(environment, nmVars, Environment.NM_HOST.name(),
+        this.context.getNodeId().getHost());
 
-    environment.put(Environment.NM_HTTP_PORT.name(),
+    addToEnvMap(environment, nmVars, Environment.NM_HTTP_PORT.name(),
       String.valueOf(this.context.getHttpPort()));
 
-    environment.put(Environment.LOCAL_DIRS.name(),
+    addToEnvMap(environment, nmVars, Environment.LOCAL_DIRS.name(),
         StringUtils.join(",", appDirs));
 
-    environment.put(Environment.LOCAL_USER_DIRS.name(), StringUtils.join(",",
-        userLocalDirs));
+    addToEnvMap(environment, nmVars, Environment.LOCAL_USER_DIRS.name(),
+        StringUtils.join(",", userLocalDirs));
 
-    environment.put(Environment.LOG_DIRS.name(),
+    addToEnvMap(environment, nmVars, Environment.LOG_DIRS.name(),
       StringUtils.join(",", containerLogDirs));
 
-    environment.put(Environment.USER.name(), container.getUser());
-    
-    environment.put(Environment.LOGNAME.name(), container.getUser());
+    addToEnvMap(environment, nmVars, Environment.USER.name(),
+        container.getUser());
 
-    environment.put(Environment.HOME.name(),
+    addToEnvMap(environment, nmVars, Environment.LOGNAME.name(),
+        container.getUser());
+
+    addToEnvMap(environment, nmVars, Environment.HOME.name(),
         conf.get(
             YarnConfiguration.NM_USER_HOME_DIR, 
             YarnConfiguration.DEFAULT_NM_USER_HOME_DIR
             )
         );
-    
-    environment.put(Environment.PWD.name(), pwd.toString());
-    
-    putEnvIfAbsent(environment, Environment.HADOOP_CONF_DIR.name());
+
+    addToEnvMap(environment, nmVars, Environment.PWD.name(), pwd.toString());
 
     if (!Shell.WINDOWS) {
-      environment.put("JVM_PID", "$$");
+      addToEnvMap(environment, nmVars, "JVM_PID", "$$");
     }
 
     // variables here will be forced in, even if the container has specified them.
-    Apps.setEnvFromInputString(environment, conf.get(
-      YarnConfiguration.NM_ADMIN_USER_ENV,
-      YarnConfiguration.DEFAULT_NM_ADMIN_USER_ENV), File.pathSeparator);
+    String nmAdminUserEnv = conf.get(
+        YarnConfiguration.NM_ADMIN_USER_ENV,
+        YarnConfiguration.DEFAULT_NM_ADMIN_USER_ENV);
+    Apps.setEnvFromInputString(environment, nmAdminUserEnv, File.pathSeparator);
+    nmVars.addAll(Apps.getEnvVarsFromInputString(nmAdminUserEnv,
+        File.pathSeparator));
 
     // TODO: Remove Windows check and use this approach on all platforms after
     // additional testing.  See YARN-358.
@@ -1502,6 +1531,7 @@ public class ContainerLaunch implements Callable<Integer> {
         .getAuxServiceMetaData().entrySet()) {
       AuxiliaryServiceHelper.setServiceDataIntoEnv(
           meta.getKey(), meta.getValue(), environment);
+      nmVars.add(AuxiliaryServiceHelper.getPrefixServiceName(meta.getKey()));
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8013475d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java
index b50d56c..83380ee 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java
@@ -37,7 +37,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.List;
-import java.util.Map;
 
 import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.*;
 
@@ -74,11 +73,6 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
   }
 
   @Override
-  public boolean useWhitelistEnv(Map<String, String> env) {
-    return true;
-  }
-
-  @Override
   public void prepareContainer(ContainerRuntimeContext ctx)
       throws ContainerExecutionException {
     //nothing to do here at the moment.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8013475d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java
index dd10617..675bffb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java
@@ -94,17 +94,6 @@ public class DelegatingLinuxContainerRuntime implements LinuxContainerRuntime {
     }
   }
 
-  @Override
-  public boolean useWhitelistEnv(Map<String, String> env) {
-    try {
-      LinuxContainerRuntime runtime = pickContainerRuntime(env);
-      return runtime.useWhitelistEnv(env);
-    } catch (ContainerExecutionException e) {
-      LOG.debug("Unable to determine runtime");
-      return false;
-    }
-  }
-
   @VisibleForTesting
   LinuxContainerRuntime pickContainerRuntime(
       Map<String, String> environment) throws ContainerExecutionException {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8013475d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
index 401fc4a..de225e6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
@@ -371,13 +371,6 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
     return capabilities;
   }
 
-  @Override
-  public boolean useWhitelistEnv(Map<String, String> env) {
-    // Avoid propagating nodemanager environment variables into the container
-    // so those variables can be picked up from the Docker image instead.
-    return false;
-  }
-
   private String runDockerVolumeCommand(DockerVolumeCommand dockerVolumeCommand,
       Container container) throws ContainerExecutionException {
     try {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8013475d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/runtime/ContainerRuntime.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/runtime/ContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/runtime/ContainerRuntime.java
index aa294fc..7caa0ed 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/runtime/ContainerRuntime.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/runtime/ContainerRuntime.java
@@ -24,8 +24,6 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 
-import java.util.Map;
-
 /**
  * An abstraction for various container runtime implementations. Examples
  * include Process Tree, Docker, Appc runtimes etc. These implementations
@@ -85,13 +83,4 @@ public interface ContainerRuntime {
    * and hostname
    */
   String[] getIpAndHost(Container container) throws ContainerExecutionException;
-
-  /**
-   * Whether to propagate the whitelist of environment variables from the
-   * nodemanager environment into the container environment.
-   * @param env the container's environment variables
-   * @return true if whitelist variables should be propagated, false otherwise
-   * @see org.apache.hadoop.yarn.conf.YarnConfiguration#NM_ENV_WHITELIST
-   */
-  boolean useWhitelistEnv(Map<String, String> env);
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8013475d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
index 5923f8e..47e268c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
@@ -41,6 +41,7 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.StringTokenizer;
@@ -185,8 +186,10 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
       DefaultContainerExecutor defaultContainerExecutor =
           new DefaultContainerExecutor();
       defaultContainerExecutor.setConf(new YarnConfiguration());
+      LinkedHashSet<String> nmVars = new LinkedHashSet<>();
       defaultContainerExecutor.writeLaunchEnv(fos, env, resources, commands,
-          new Path(localLogDir.getAbsolutePath()), "user", tempFile.getName());
+          new Path(localLogDir.getAbsolutePath()), "user", tempFile.getName(),
+          nmVars);
       fos.flush();
       fos.close();
       FileUtil.setExecutable(tempFile, true);
@@ -260,8 +263,9 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
       DefaultContainerExecutor defaultContainerExecutor =
           new DefaultContainerExecutor();
       defaultContainerExecutor.setConf(new YarnConfiguration());
+      LinkedHashSet<String> nmVars = new LinkedHashSet<>();
       defaultContainerExecutor.writeLaunchEnv(fos, env, resources, commands,
-          new Path(localLogDir.getAbsolutePath()), "user");
+          new Path(localLogDir.getAbsolutePath()), "user", nmVars);
       fos.flush();
       fos.close();
       FileUtil.setExecutable(tempFile, true);
@@ -323,8 +327,9 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
     conf.set(YarnConfiguration.NM_ENV_WHITELIST,
         "HADOOP_MAPRED_HOME,HADOOP_YARN_HOME");
     defaultContainerExecutor.setConf(conf);
+    LinkedHashSet<String> nmVars = new LinkedHashSet<>();
     defaultContainerExecutor.writeLaunchEnv(fos, env, resources, commands,
-        new Path(localLogDir.getAbsolutePath()), "user");
+        new Path(localLogDir.getAbsolutePath()), "user", nmVars);
     String shellContent =
         new String(Files.readAllBytes(Paths.get(shellFile.getAbsolutePath())),
             StandardCharsets.UTF_8);
@@ -337,7 +342,8 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
     Assert.assertFalse(shellContent.contains("HADOOP_HDFS_HOME"));
     // Available in env and in whitelist
     Assert.assertTrue(shellContent.contains(
-        "export HADOOP_YARN_HOME=\"nodemanager_yarn_home\""));
+        "export HADOOP_YARN_HOME=${HADOOP_YARN_HOME:-\"nodemanager_yarn_home\"}"
+      ));
     fos.flush();
     fos.close();
   }
@@ -372,8 +378,9 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
     conf.set(YarnConfiguration.NM_ENV_WHITELIST,
         "HADOOP_MAPRED_HOME,HADOOP_YARN_HOME");
     lce.setConf(conf);
+    LinkedHashSet<String> nmVars = new LinkedHashSet<>();
     lce.writeLaunchEnv(fos, env, resources, commands,
-        new Path(localLogDir.getAbsolutePath()), "user");
+        new Path(localLogDir.getAbsolutePath()), "user", nmVars);
     String shellContent =
         new String(Files.readAllBytes(Paths.get(shellFile.getAbsolutePath())),
             StandardCharsets.UTF_8);
@@ -382,13 +389,106 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
     // Whitelisted variable overridden by container
     Assert.assertTrue(shellContent.contains(
         "export HADOOP_MAPRED_HOME=\"/opt/hadoopbuild\""));
-    // Verify no whitelisted variables inherited from NM env
+    // Available in env but not in whitelist
     Assert.assertFalse(shellContent.contains("HADOOP_HDFS_HOME"));
-    Assert.assertFalse(shellContent.contains("HADOOP_YARN_HOME"));
+    // Available in env and in whitelist
+    Assert.assertTrue(shellContent.contains(
+        "export HADOOP_YARN_HOME=${HADOOP_YARN_HOME:-\"nodemanager_yarn_home\"}"
+    ));
+    fos.flush();
+    fos.close();
+  }
+
+  @Test(timeout = 20000)
+  public void testWriteEnvOrder() throws Exception {
+    // Valid only for unix
+    assumeNotWindows();
+    List<String> commands = new ArrayList<String>();
+
+    // Setup user-defined environment
+    Map<String, String> env = new HashMap<String, String>();
+    env.put("USER_VAR_1", "1");
+    env.put("USER_VAR_2", "2");
+    env.put("NM_MODIFIED_VAR_1", "nm 1");
+    env.put("NM_MODIFIED_VAR_2", "nm 2");
+
+    // These represent vars explicitly set by NM
+    LinkedHashSet<String> trackedNmVars = new LinkedHashSet<>();
+    trackedNmVars.add("NM_MODIFIED_VAR_1");
+    trackedNmVars.add("NM_MODIFIED_VAR_2");
+
+    // Setup Nodemanager environment
+    final Map<String, String> nmEnv = new HashMap<>();
+    nmEnv.put("WHITELIST_VAR_1", "wl 1");
+    nmEnv.put("WHITELIST_VAR_2", "wl 2");
+    nmEnv.put("NON_WHITELIST_VAR_1", "nwl 1");
+    nmEnv.put("NON_WHITELIST_VAR_2", "nwl 2");
+    DefaultContainerExecutor defaultContainerExecutor =
+        new DefaultContainerExecutor() {
+          @Override
+          protected String getNMEnvVar(String varname) {
+            return nmEnv.get(varname);
+          }
+        };
+
+    // Setup conf with whitelisted variables
+    ArrayList<String> whitelistVars = new ArrayList<>();
+    whitelistVars.add("WHITELIST_VAR_1");
+    whitelistVars.add("WHITELIST_VAR_2");
+    YarnConfiguration conf = new YarnConfiguration();
+    conf.set(YarnConfiguration.NM_ENV_WHITELIST,
+        whitelistVars.get(0) + "," + whitelistVars.get(1));
+
+    // These are in the NM env, but not in the whitelist.
+    ArrayList<String> nonWhiteListEnv = new ArrayList<>();
+    nonWhiteListEnv.add("NON_WHITELIST_VAR_1");
+    nonWhiteListEnv.add("NON_WHITELIST_VAR_2");
+
+    // Write the launch script
+    File shellFile = Shell.appendScriptExtension(tmpDir, "hello");
+    Map<Path, List<String>> resources = new HashMap<Path, List<String>>();
+    FileOutputStream fos = new FileOutputStream(shellFile);
+    defaultContainerExecutor.setConf(conf);
+    defaultContainerExecutor.writeLaunchEnv(fos, env, resources, commands,
+        new Path(localLogDir.getAbsolutePath()), "user", trackedNmVars);
     fos.flush();
     fos.close();
+
+    // Examine the script
+    String shellContent =
+        new String(Files.readAllBytes(Paths.get(shellFile.getAbsolutePath())),
+            StandardCharsets.UTF_8);
+    // First make sure everything is there that's supposed to be
+    for (String envVar : env.keySet()) {
+      Assert.assertTrue(shellContent.contains(envVar + "="));
+    }
+    for (String wlVar : whitelistVars) {
+      Assert.assertTrue(shellContent.contains(wlVar + "="));
+    }
+    for (String nwlVar : nonWhiteListEnv) {
+      Assert.assertFalse(shellContent.contains(nwlVar + "="));
+    }
+    // Explicitly Set NM vars should be before user vars
+    for (String nmVar : trackedNmVars) {
+      for (String userVar : env.keySet()) {
+        // Need to skip nm vars and whitelist vars
+        if (!trackedNmVars.contains(userVar) &&
+            !whitelistVars.contains(userVar)) {
+          Assert.assertTrue(shellContent.indexOf(nmVar + "=") <
+              shellContent.indexOf(userVar + "="));
+        }
+      }
+    }
+    // Whitelisted vars should be before explicitly set NM vars
+    for (String wlVar : whitelistVars) {
+      for (String nmVar : trackedNmVars) {
+        Assert.assertTrue(shellContent.indexOf(wlVar + "=") <
+            shellContent.indexOf(nmVar + "="));
+      }
+    }
   }
 
+
   @Test (timeout = 20000)
   public void testInvalidEnvSyntaxDiagnostics() throws IOException  {
 
@@ -410,8 +510,9 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
       DefaultContainerExecutor defaultContainerExecutor =
           new DefaultContainerExecutor();
       defaultContainerExecutor.setConf(new YarnConfiguration());
+      LinkedHashSet<String> nmVars = new LinkedHashSet<>();
       defaultContainerExecutor.writeLaunchEnv(fos, env, resources, commands,
-          new Path(localLogDir.getAbsolutePath()), "user");
+          new Path(localLogDir.getAbsolutePath()), "user", nmVars);
       fos.flush();
       fos.close();
 
@@ -493,8 +594,9 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
       commands.add(command);
       ContainerExecutor exec = new DefaultContainerExecutor();
       exec.setConf(new YarnConfiguration());
+      LinkedHashSet<String> nmVars = new LinkedHashSet<>();
       exec.writeLaunchEnv(fos, env, resources, commands,
-          new Path(localLogDir.getAbsolutePath()), "user");
+          new Path(localLogDir.getAbsolutePath()), "user", nmVars);
       fos.flush();
       fos.close();
 
@@ -585,7 +687,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
     Path nmp = new Path(testDir);
 
     launch.sanitizeEnv(userSetEnv, pwd, appDirs, userLocalDirs, containerLogs,
-        resources, nmp);
+        resources, nmp, Collections.emptySet());
 
     List<String> result =
       getJarManifestClasspath(userSetEnv.get(Environment.CLASSPATH.name()));
@@ -604,7 +706,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
         dispatcher, exec, null, container, dirsHandler, containerManager);
 
     launch.sanitizeEnv(userSetEnv, pwd, appDirs, userLocalDirs, containerLogs,
-        resources, nmp);
+        resources, nmp, Collections.emptySet());
 
     result =
       getJarManifestClasspath(userSetEnv.get(Environment.CLASSPATH.name()));
@@ -1528,9 +1630,10 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
         FileOutputStream fos = new FileOutputStream(tempFile);
         ContainerExecutor exec = new DefaultContainerExecutor();
         exec.setConf(conf);
+        LinkedHashSet<String> nmVars = new LinkedHashSet<>();
         exec.writeLaunchEnv(fos, env, resources, commands,
             new Path(localLogDir.getAbsolutePath()), "user",
-            tempFile.getName());
+            tempFile.getName(), nmVars);
         fos.flush();
         fos.close();
         FileUtil.setExecutable(tempFile, true);
@@ -1753,8 +1856,9 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
       List<String> commands = new ArrayList<String>();
       DefaultContainerExecutor executor = new DefaultContainerExecutor();
       executor.setConf(new Configuration());
+      LinkedHashSet<String> nmVars = new LinkedHashSet<>();
       executor.writeLaunchEnv(fos, env, resources, commands,
-          new Path(localLogDir.getAbsolutePath()), user);
+          new Path(localLogDir.getAbsolutePath()), user, nmVars);
       fos.flush();
       fos.close();
 
@@ -1798,8 +1902,9 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
       Configuration execConf = new Configuration();
       execConf.setBoolean(YarnConfiguration.NM_LOG_CONTAINER_DEBUG_INFO, false);
       executor.setConf(execConf);
+      LinkedHashSet<String> nmVars = new LinkedHashSet<>();
       executor.writeLaunchEnv(fos, env, resources, commands,
-          new Path(localLogDir.getAbsolutePath()), user);
+          new Path(localLogDir.getAbsolutePath()), user, nmVars);
       fos.flush();
       fos.close();
 


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[10/21] hadoop git commit: HDFS-13112. Token expiration edits may cause log corruption or deadlock. Contributed by Daryn Sharp.

Posted by ha...@apache.org.

HDFS-13112. Token expiration edits may cause log corruption or deadlock. Contributed by Daryn Sharp.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/47473952
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/47473952
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/47473952

Branch: refs/heads/HDFS-12996
Commit: 47473952e56b0380147d42f4110ad03c2276c961
Parents: a53d62a
Author: Kihwal Lee <ki...@apache.org>
Authored: Thu Feb 15 15:32:42 2018 -0600
Committer: Kihwal Lee <ki...@apache.org>
Committed: Thu Feb 15 15:32:42 2018 -0600

----------------------------------------------------------------------
 .../DelegationTokenSecretManager.java           | 53 ++++++++++++++------
 .../hdfs/server/namenode/FSNamesystem.java      | 17 ++++---
 .../hdfs/server/namenode/FSNamesystemLock.java  |  7 +++
 .../org/apache/hadoop/hdfs/util/RwLock.java     |  5 +-
 .../namenode/TestSecurityTokenEditLog.java      | 24 ++++++++-
 5 files changed, 83 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/47473952/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
index b7f89a8..3547c96 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.hdfs.security.token.delegation;
 import java.io.DataInput;
 import java.io.DataOutputStream;
 import java.io.IOException;
-import java.io.InterruptedIOException;
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.Iterator;
@@ -366,34 +365,58 @@ public class DelegationTokenSecretManager
   @Override //AbstractDelegationTokenManager
   protected void logUpdateMasterKey(DelegationKey key)
       throws IOException {
-    synchronized (noInterruptsLock) {
+    try {
       // The edit logging code will fail catastrophically if it
       // is interrupted during a logSync, since the interrupt
       // closes the edit log files. Doing this inside the
-      // above lock and then checking interruption status
-      // prevents this bug.
-      if (Thread.interrupted()) {
-        throw new InterruptedIOException(
-            "Interrupted before updating master key");
+      // fsn lock will prevent being interrupted when stopping
+      // the secret manager.
+      namesystem.readLockInterruptibly();
+      try {
+        // this monitor isn't necessary if stopped while holding write lock
+        // but for safety, guard against a stop with read lock.
+        synchronized (noInterruptsLock) {
+          if (Thread.currentThread().isInterrupted()) {
+            return; // leave flag set so secret monitor exits.
+          }
+          namesystem.logUpdateMasterKey(key);
+        }
+      } finally {
+        namesystem.readUnlock();
       }
-      namesystem.logUpdateMasterKey(key);
+    } catch (InterruptedException ie) {
+      // AbstractDelegationTokenManager may crash if an exception is thrown.
+      // The interrupt flag will be detected when it attempts to sleep.
+      Thread.currentThread().interrupt();
     }
   }
   
   @Override //AbstractDelegationTokenManager
   protected void logExpireToken(final DelegationTokenIdentifier dtId)
       throws IOException {
-    synchronized (noInterruptsLock) {
+    try {
       // The edit logging code will fail catastrophically if it
       // is interrupted during a logSync, since the interrupt
       // closes the edit log files. Doing this inside the
-      // above lock and then checking interruption status
-      // prevents this bug.
-      if (Thread.interrupted()) {
-        throw new InterruptedIOException(
-            "Interrupted before expiring delegation token");
+      // fsn lock will prevent being interrupted when stopping
+      // the secret manager.
+      namesystem.readLockInterruptibly();
+      try {
+        // this monitor isn't necessary if stopped while holding write lock
+        // but for safety, guard against a stop with read lock.
+        synchronized (noInterruptsLock) {
+          if (Thread.currentThread().isInterrupted()) {
+            return; // leave flag set so secret monitor exits.
+          }
+          namesystem.logExpireDelegationToken(dtId);
+        }
+      } finally {
+        namesystem.readUnlock();
       }
-      namesystem.logExpireDelegationToken(dtId);
+    } catch (InterruptedException ie) {
+      // AbstractDelegationTokenManager may crash if an exception is thrown.
+      // The interrupt flag will be detected when it attempts to sleep.
+      Thread.currentThread().interrupt();
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/47473952/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 6c27d7e..b0973a9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -1580,6 +1580,10 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     this.fsLock.readLock();
   }
   @Override
+  public void readLockInterruptibly() throws InterruptedException {
+    this.fsLock.readLockInterruptibly();
+  }
+  @Override
   public void readUnlock() {
     this.fsLock.readUnlock();
   }
@@ -5675,9 +5679,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     assert !isInSafeMode() :
       "this should never be called while in safemode, since we stop " +
       "the DT manager before entering safemode!";
-    // No need to hold FSN lock since we don't access any internal
-    // structures, and this is stopped before the FSN shuts itself
-    // down, etc.
+    // edit log rolling is not thread-safe and must be protected by the
+    // fsn lock.  not updating namespace so read lock is sufficient.
+    assert hasReadLock();
     getEditLog().logUpdateMasterKey(key);
     getEditLog().logSync();
   }
@@ -5691,9 +5695,10 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     assert !isInSafeMode() :
       "this should never be called while in safemode, since we stop " +
       "the DT manager before entering safemode!";
-    // No need to hold FSN lock since we don't access any internal
-    // structures, and this is stopped before the FSN shuts itself
-    // down, etc.
+    // edit log rolling is not thread-safe and must be protected by the
+    // fsn lock.  not updating namespace so read lock is sufficient.
+    assert hasReadLock();
+    // do not logSync so expiration edits are batched
     getEditLog().logCancelDelegationToken(id);
   }  
   

http://git-wip-us.apache.org/repos/asf/hadoop/blob/47473952/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java
index 32c7efa..900f8a2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java
@@ -145,6 +145,13 @@ class FSNamesystemLock {
     }
   }
 
+  public void readLockInterruptibly() throws InterruptedException {
+    coarseLock.readLock().lockInterruptibly();
+    if (coarseLock.getReadHoldCount() == 1) {
+      readLockHeldTimeStampNanos.set(timer.monotonicNowNanos());
+    }
+  }
+
   public void readUnlock() {
     readUnlock(OP_NAME_OTHER);
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/47473952/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java
index e36f0f7..deaeaa4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java
@@ -21,7 +21,10 @@ package org.apache.hadoop.hdfs.util;
 public interface RwLock {
   /** Acquire read lock. */
   public void readLock();
-  
+
+  /** Acquire read lock, unless interrupted while waiting  */
+  void readLockInterruptibly() throws InterruptedException;
+
   /** Release read lock. */
   public void readUnlock();
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/47473952/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
index 5aa19bb..c43c909 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
@@ -24,6 +24,7 @@ import java.io.File;
 import java.io.IOException;
 import java.net.URI;
 import java.util.Iterator;
+import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -37,7 +38,11 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
+import org.junit.Assert;
 import org.junit.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
 import static org.mockito.Mockito.*;
 
 /**
@@ -180,8 +185,25 @@ public class TestSecurityTokenEditLog {
     Text renewer = new Text(UserGroupInformation.getCurrentUser().getUserName());
     FSImage fsImage = mock(FSImage.class);
     FSEditLog log = mock(FSEditLog.class);
-    doReturn(log).when(fsImage).getEditLog();   
+    doReturn(log).when(fsImage).getEditLog();
+    // verify that the namesystem read lock is held while logging token
+    // expirations.  the namesystem is not updated, so write lock is not
+    // necessary, but the lock is required because edit log rolling is not
+    // thread-safe.
+    final AtomicReference<FSNamesystem> fsnRef = new AtomicReference<>();
+    doAnswer(
+      new Answer<Void>() {
+        @Override
+        public Void answer(InvocationOnMock invocation) throws Throwable {
+          // fsn claims read lock if either read or write locked.
+          Assert.assertTrue(fsnRef.get().hasReadLock());
+          Assert.assertFalse(fsnRef.get().hasWriteLock());
+          return null;
+        }
+      }
+    ).when(log).logCancelDelegationToken(any(DelegationTokenIdentifier.class));
     FSNamesystem fsn = new FSNamesystem(conf, fsImage);
+    fsnRef.set(fsn);
     
     DelegationTokenSecretManager dtsm = fsn.getDelegationTokenSecretManager();
     try {


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[02/21] hadoop git commit: HDFS-13142. Define and Implement a DiifList Interface to store and manage SnapshotDiffs. Contributed by Shashikant Banerjee

Posted by ha...@apache.org.

HDFS-13142. Define and Implement a DiifList Interface to store and manage SnapshotDiffs.  Contributed by Shashikant Banerjee


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/6ea7d78c
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/6ea7d78c
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/6ea7d78c

Branch: refs/heads/HDFS-12996
Commit: 6ea7d78ccb0d1c4af9bcac02a4cff89bdffff252
Parents: 8f66aff
Author: Tsz-Wo Nicholas Sze <sz...@hortonworks.com>
Authored: Thu Feb 15 19:33:44 2018 +0800
Committer: Tsz-Wo Nicholas Sze <sz...@hortonworks.com>
Committed: Thu Feb 15 19:33:44 2018 +0800

----------------------------------------------------------------------
 .../hadoop/hdfs/server/namenode/INodeFile.java  |   3 +-
 .../snapshot/AbstractINodeDiffList.java         |  27 ++--
 .../hdfs/server/namenode/snapshot/DiffList.java | 140 +++++++++++++++++++
 .../namenode/snapshot/DiffListByArrayList.java  |  80 +++++++++++
 .../snapshot/DirectoryWithSnapshotFeature.java  |  10 +-
 .../snapshot/FSImageFormatPBSnapshot.java       |   4 +-
 .../server/namenode/snapshot/FileDiffList.java  |  11 +-
 .../snapshot/FileWithSnapshotFeature.java       |   2 +-
 .../snapshot/SnapshotFSImageFormat.java         |   4 +-
 .../namenode/TestFSImageWithSnapshot.java       |   3 +-
 .../snapshot/TestRenameWithSnapshots.java       |  40 +++---
 .../snapshot/TestSetQuotaWithSnapshot.java      |   3 +-
 .../namenode/snapshot/TestSnapshotRename.java   |   3 +-
 13 files changed, 276 insertions(+), 54 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/6ea7d78c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
index 90659f3..6693297 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
@@ -52,6 +52,7 @@ import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiff;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.FileWithSnapshotFeature;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.DiffList;
 import org.apache.hadoop.hdfs.util.LongBitFormat;
 import org.apache.hadoop.util.StringUtils;
 import static org.apache.hadoop.io.erasurecode.ErasureCodeConstants.REPLICATION_POLICY_ID;
@@ -988,7 +989,7 @@ public class INodeFile extends INodeWithAdditionalFields
     } else {
       // Collect all distinct blocks
       Set<BlockInfo> allBlocks = new HashSet<>(Arrays.asList(getBlocks()));
-      List<FileDiff> diffs = sf.getDiffs().asList();
+      DiffList<FileDiff> diffs = sf.getDiffs().asList();
       for(FileDiff diff : diffs) {
         BlockInfo[] diffBlocks = diff.getBlocks();
         if (diffBlocks != null) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6ea7d78c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiffList.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiffList.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiffList.java
index 98d8c53..8f2465a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiffList.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiffList.java
@@ -17,7 +17,6 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
@@ -36,14 +35,15 @@ abstract class AbstractINodeDiffList<N extends INode,
                                      A extends INodeAttributes,
                                      D extends AbstractINodeDiff<N, A, D>> 
     implements Iterable<D> {
+
   /** Diff list sorted by snapshot IDs, i.e. in chronological order.
     * Created lazily to avoid wasting memory by empty lists. */
-  private List<D> diffs;
+  private DiffList<D> diffs;
 
   /** @return this list as a unmodifiable {@link List}. */
-  public final List<D> asList() {
+  public final DiffList<D> asList() {
     return diffs != null ?
-        Collections.unmodifiableList(diffs) : Collections.emptyList();
+        DiffList.unmodifiableList(diffs) : DiffList.emptyList();
   }
   
   /** Clear the list. */
@@ -72,7 +72,7 @@ abstract class AbstractINodeDiffList<N extends INode,
     if (diffs == null) {
       return;
     }
-    int snapshotIndex = Collections.binarySearch(diffs, snapshot);
+    int snapshotIndex = diffs.binarySearch(snapshot);
 
     D removed;
     if (snapshotIndex == 0) {
@@ -114,7 +114,7 @@ abstract class AbstractINodeDiffList<N extends INode,
   private D addLast(D diff) {
     createDiffsIfNeeded();
     final D last = getLast();
-    diffs.add(diff);
+    diffs.addLast(diff);
     if (last != null) {
       last.setPosterior(diff);
     }
@@ -125,7 +125,7 @@ abstract class AbstractINodeDiffList<N extends INode,
   final void addFirst(D diff) {
     createDiffsIfNeeded();
     final D first = diffs.isEmpty()? null : diffs.get(0);
-    diffs.add(0, diff);
+    diffs.addFirst(diff);
     diff.setPosterior(first);
   }
 
@@ -140,7 +140,8 @@ abstract class AbstractINodeDiffList<N extends INode,
 
   private void createDiffsIfNeeded() {
     if (diffs == null) {
-      diffs = new ArrayList<>(INodeDirectory.DEFAULT_FILES_PER_DIRECTORY);
+      diffs =
+          new DiffListByArrayList<>(INodeDirectory.DEFAULT_FILES_PER_DIRECTORY);
     }
   }
 
@@ -169,7 +170,7 @@ abstract class AbstractINodeDiffList<N extends INode,
       }
       return last;
     }
-    final int i = Collections.binarySearch(diffs, anchorId);
+    final int i = diffs.binarySearch(anchorId);
     if (exclusive) { // must be the one before
       if (i == -1 || i == 0) {
         return Snapshot.NO_SNAPSHOT_ID;
@@ -208,7 +209,7 @@ abstract class AbstractINodeDiffList<N extends INode,
     if (snapshotId == Snapshot.CURRENT_STATE_ID || diffs == null) {
       return null;
     }
-    final int i = Collections.binarySearch(diffs, snapshotId);
+    final int i = diffs.binarySearch(snapshotId);
     if (i >= 0) {
       // exact match
       return diffs.get(i);
@@ -242,9 +243,9 @@ abstract class AbstractINodeDiffList<N extends INode,
     }
 
     final int size = diffs.size();
-    int earlierDiffIndex = Collections.binarySearch(diffs, earlier.getId());
-    int laterDiffIndex = later == null ? size : Collections
-        .binarySearch(diffs, later.getId());
+    int earlierDiffIndex = diffs.binarySearch(earlier.getId());
+    int laterDiffIndex = later == null ? size
+        : diffs.binarySearch(later.getId());
     if (-earlierDiffIndex - 1 == size) {
       // if the earlierSnapshot is after the latest SnapshotDiff stored in
       // diffs, no modification happened after the earlierSnapshot

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6ea7d78c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffList.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffList.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffList.java
new file mode 100644
index 0000000..82fd3f9
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffList.java
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.snapshot;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+/**
+ * This interface defines the methods used to store and manage InodeDiffs.
+ * @param <T> Type of the object in this list.
+ */
+public interface DiffList<T extends Comparable<Integer>> extends Iterable<T> {
+  DiffList EMPTY_LIST = new DiffListByArrayList(Collections.emptyList());
+
+  /**
+   * Returns an empty DiffList.
+   */
+  static <T extends Comparable<Integer>> DiffList<T> emptyList() {
+    return EMPTY_LIST;
+  }
+
+  /**
+   * Returns an unmodifiable diffList.
+   * @param diffs DiffList
+   * @param <T> Type of the object in the the diffList
+   * @return Unmodifiable diffList
+   */
+  static <T extends Comparable<Integer>> DiffList<T> unmodifiableList(
+      DiffList<T> diffs) {
+    return new DiffList<T>() {
+      @Override
+      public T get(int i) {
+        return diffs.get(i);
+      }
+
+      @Override
+      public boolean isEmpty() {
+        return diffs.isEmpty();
+      }
+
+      @Override
+      public int size() {
+        return diffs.size();
+      }
+
+      @Override
+      public T remove(int i) {
+        throw new UnsupportedOperationException("This list is unmodifiable.");
+      }
+
+      @Override
+      public boolean addLast(T t) {
+        throw new UnsupportedOperationException("This list is unmodifiable.");
+      }
+
+      @Override
+      public void addFirst(T t) {
+        throw new UnsupportedOperationException("This list is unmodifiable.");
+      }
+
+      @Override
+      public int binarySearch(int i) {
+        return diffs.binarySearch(i);
+      }
+
+      @Override
+      public Iterator<T> iterator() {
+        return diffs.iterator();
+      }
+    };
+  }
+
+  /**
+   * Returns the element at the specified position in this list.
+   *
+   * @param index index of the element to return
+   * @return the element at the specified position in this list
+   * @throws IndexOutOfBoundsException if the index is out of range
+   *         (<tt>index &lt; 0 || index &gt;= size()</tt>)
+   */
+  T get(int index);
+
+  /**
+   * Returns true if this list contains no elements.
+   *
+   * @return true if this list contains no elements
+   */
+  boolean isEmpty();
+
+  /**
+   * Returns the number of elements in this list.
+   * @return the number of elements in this list.
+   */
+  int size();
+
+  /**
+   * Removes the element at the specified position in this list.
+   * @param index the index of the element to be removed
+   * @return the element previously at the specified position
+   */
+  T remove(int index);
+
+  /**
+   * Adds an element at the end of the list.
+   * @param t element to be appended to this list
+   * @return true, if insertion is successful
+   */
+  boolean addLast(T t);
+
+  /**
+   * Adds an element at the beginning of the list.
+   * @param t element to be added to this list
+   */
+  void addFirst(T t);
+
+  /**
+   * Searches the list for the specified object using the binary
+   * search algorithm.
+   * @param key key to be searched for
+   * @return the index of the search key, if it is contained in the list
+   *         otherwise, (-insertion point - 1).
+   */
+  int binarySearch(int key);
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6ea7d78c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffListByArrayList.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffListByArrayList.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffListByArrayList.java
new file mode 100644
index 0000000..03aa5c2
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffListByArrayList.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.snapshot;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Resizable-array implementation of the DiffList interface.
+ * @param <T> Type of the object in the list
+ */
+public class DiffListByArrayList<T extends Comparable<Integer>>
+    implements DiffList<T> {
+  private final List<T> list;
+
+  DiffListByArrayList(List<T> list) {
+    this.list = list;
+  }
+
+  public DiffListByArrayList(int initialCapacity) {
+    this(new ArrayList<>(initialCapacity));
+  }
+
+  @Override
+  public T get(int i) {
+    return list.get(i);
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return list.isEmpty();
+  }
+
+  @Override
+  public int size() {
+    return list.size();
+  }
+
+  @Override
+  public T remove(int i) {
+    return list.remove(i);
+  }
+
+  @Override
+  public boolean addLast(T t) {
+    return list.add(t);
+  }
+
+  @Override
+  public void addFirst(T t) {
+    list.add(0, t);
+  }
+
+  @Override
+  public int binarySearch(int i) {
+    return Collections.binarySearch(list, i);
+  }
+
+  @Override
+  public Iterator<T> iterator() {
+    return list.iterator();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6ea7d78c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
index 7535879..8ed9c7a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
@@ -225,7 +225,7 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
         private List<INode> initChildren() {
           if (children == null) {
             final ChildrenDiff combined = new ChildrenDiff();
-            for (DirectoryDiff d = DirectoryDiff.this; d != null; 
+            for (DirectoryDiff d = DirectoryDiff.this; d != null;
                 d = d.getPosterior()) {
               combined.combinePosterior(d.diff, null);
             }
@@ -334,7 +334,7 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
     /** Replace the given child in the created/deleted list, if there is any. */
     public boolean replaceChild(final ListType type, final INode oldChild,
         final INode newChild) {
-      final List<DirectoryDiff> diffList = asList();
+      final DiffList<DirectoryDiff> diffList = asList();
       for(int i = diffList.size() - 1; i >= 0; i--) {
         final ChildrenDiff diff = diffList.get(i).diff;
         if (diff.replace(type, oldChild, newChild)) {
@@ -346,7 +346,7 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
 
     /** Remove the given child in the created/deleted list, if there is any. */
     public boolean removeChild(final ListType type, final INode child) {
-      final List<DirectoryDiff> diffList = asList();
+      final DiffList<DirectoryDiff> diffList = asList();
       for(int i = diffList.size() - 1; i >= 0; i--) {
         final ChildrenDiff diff = diffList.get(i).diff;
         if (diff.removeChild(type, child)) {
@@ -363,7 +363,7 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
      * given inode is not in any of the snapshot.
      */
     public int findSnapshotDeleted(final INode child) {
-      final List<DirectoryDiff> diffList = asList();
+      final DiffList<DirectoryDiff> diffList = asList();
       for(int i = diffList.size() - 1; i >= 0; i--) {
         final ChildrenDiff diff = diffList.get(i).diff;
         final int d = diff.searchIndex(ListType.DELETED,
@@ -669,7 +669,7 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
 
     boolean dirMetadataChanged = false;
     INodeDirectoryAttributes dirCopy = null;
-    List<DirectoryDiff> difflist = diffs.asList();
+    DiffList<DirectoryDiff> difflist = diffs.asList();
     for (int i = earlierDiffIndex; i < laterDiffIndex; i++) {
       DirectoryDiff sdiff = difflist.get(i);
       diff.combinePosterior(sdiff.diff, null);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6ea7d78c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
index f31743a..4b619a4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
@@ -523,7 +523,7 @@ public class FSImageFormatPBSnapshot {
         throws IOException {
       FileWithSnapshotFeature sf = file.getFileWithSnapshotFeature();
       if (sf != null) {
-        List<FileDiff> diffList = sf.getDiffs().asList();
+        DiffList<FileDiff> diffList = sf.getDiffs().asList();
         SnapshotDiffSection.DiffEntry entry = SnapshotDiffSection.DiffEntry
             .newBuilder().setInodeId(file.getId()).setType(Type.FILEDIFF)
             .setNumOfDiff(diffList.size()).build();
@@ -563,7 +563,7 @@ public class FSImageFormatPBSnapshot {
         throws IOException {
       DirectoryWithSnapshotFeature sf = dir.getDirectoryWithSnapshotFeature();
       if (sf != null) {
-        List<DirectoryDiff> diffList = sf.getDiffs().asList();
+        DiffList<DirectoryDiff> diffList = sf.getDiffs().asList();
         SnapshotDiffSection.DiffEntry entry = SnapshotDiffSection.DiffEntry
             .newBuilder().setInodeId(dir.getId()).setType(Type.DIRECTORYDIFF)
             .setNumOfDiff(diffList.size()).build();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6ea7d78c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiffList.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiffList.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiffList.java
index 9dcd4d8..2c04a49 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiffList.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiffList.java
@@ -17,9 +17,6 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
-import java.util.Collections;
-import java.util.List;
-
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous;
@@ -66,8 +63,8 @@ public class FileDiffList extends
     if (snapshotId == Snapshot.CURRENT_STATE_ID) {
       return null;
     }
-    List<FileDiff> diffs = this.asList();
-    int i = Collections.binarySearch(diffs, snapshotId);
+    DiffList<FileDiff> diffs = this.asList();
+    int i = diffs.binarySearch(snapshotId);
     BlockInfo[] blocks = null;
     for(i = i >= 0 ? i : -i-2; i >= 0; i--) {
       blocks = diffs.get(i).getBlocks();
@@ -83,8 +80,8 @@ public class FileDiffList extends
     if (snapshotId == Snapshot.CURRENT_STATE_ID) {
       return null;
     }
-    List<FileDiff> diffs = this.asList();
-    int i = Collections.binarySearch(diffs, snapshotId);
+    DiffList<FileDiff> diffs = this.asList();
+    int i = diffs.binarySearch(snapshotId);
     BlockInfo[] blocks = null;
     for (i = i >= 0 ? i+1 : -i-1; i < diffs.size(); i++) {
       blocks = diffs.get(i).getBlocks();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6ea7d78c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java
index b52e8d6..80061c3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java
@@ -86,7 +86,7 @@ public class FileWithSnapshotFeature implements INode.Feature {
     int earlierDiffIndex = diffIndexPair[0];
     int laterDiffIndex = diffIndexPair[1];
 
-    final List<FileDiff> diffList = diffs.asList();
+    final DiffList<FileDiff> diffList = diffs.asList();
     final long earlierLength = diffList.get(earlierDiffIndex).getFileSize();
     final long laterLength = laterDiffIndex == diffList.size() ? file
         .computeFileSize(true, false) : diffList.get(laterDiffIndex)

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6ea7d78c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java
index fcab53a..d1ae293 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java
@@ -82,7 +82,7 @@ public class SnapshotFSImageFormat {
     if (diffs == null) {
       out.writeInt(-1); // no diffs
     } else {
-      final List<D> list = diffs.asList();
+      final DiffList<D> list = diffs.asList();
       final int size = list.size();
       out.writeInt(size);
       for (int i = size - 1; i >= 0; i--) {
@@ -306,7 +306,7 @@ public class SnapshotFSImageFormat {
     List<INode> deletedList = loadDeletedList(parent, createdList, in, loader);
     
     // 6. Compose the SnapshotDiff
-    List<DirectoryDiff> diffs = parent.getDiffs().asList();
+    DiffList<DirectoryDiff> diffs = parent.getDiffs().asList();
     DirectoryDiff sdiff = new DirectoryDiff(snapshot.getId(), snapshotINode,
         diffs.isEmpty() ? null : diffs.get(0), childrenSize, createdList,
         deletedList, snapshotINode == snapshot.getRoot());

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6ea7d78c/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java
index 82f5cfb..58ecc8a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.DiffList;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiff;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper;
@@ -199,7 +200,7 @@ public class TestFSImageWithSnapshot {
     assertTrue("The children list of root should be empty", 
         rootNode.getChildrenList(Snapshot.CURRENT_STATE_ID).isEmpty());
     // one snapshot on root: s1
-    List<DirectoryDiff> diffList = rootNode.getDiffs().asList();
+    DiffList<DirectoryDiff> diffList = rootNode.getDiffs().asList();
     assertEquals(1, diffList.size());
     Snapshot s1 = rootNode.getSnapshot(DFSUtil.string2Bytes("s1"));
     assertEquals(s1.getId(), diffList.get(0).getSnapshotId());

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6ea7d78c/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java
index 91eec78..770651e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java
@@ -983,7 +983,7 @@ public class TestRenameWithSnapshots {
     // 5 references: s1, s22, s333, s2222, current tree of sdir1
     assertEquals(5, fooWithCount.getReferenceCount());
     INodeDirectory foo = fooWithCount.asDirectory();
-    List<DirectoryDiff> fooDiffs = foo.getDiffs().asList();
+    DiffList<DirectoryDiff> fooDiffs = foo.getDiffs().asList();
     assertEquals(4, fooDiffs.size());
     
     Snapshot s2222 = sdir2Node.getSnapshot(DFSUtil.string2Bytes("s2222"));
@@ -996,7 +996,7 @@ public class TestRenameWithSnapshots {
     assertEquals(s22.getId(), fooDiffs.get(1).getSnapshotId());
     assertEquals(s1.getId(), fooDiffs.get(0).getSnapshotId());
     INodeFile bar1 = fsdir.getINode4Write(bar1_dir1.toString()).asFile();
-    List<FileDiff> bar1Diffs = bar1.getDiffs().asList();
+    DiffList<FileDiff> bar1Diffs = bar1.getDiffs().asList();
     assertEquals(3, bar1Diffs.size());
     assertEquals(s333.getId(), bar1Diffs.get(2).getSnapshotId());
     assertEquals(s22.getId(), bar1Diffs.get(1).getSnapshotId());
@@ -1008,7 +1008,7 @@ public class TestRenameWithSnapshots {
     // 5 references: s1, s22, s333, s2222, current tree of sdir1
     assertEquals(5, barWithCount.getReferenceCount());
     INodeFile bar = barWithCount.asFile();
-    List<FileDiff> barDiffs = bar.getDiffs().asList();
+    DiffList<FileDiff> barDiffs = bar.getDiffs().asList();
     assertEquals(4, barDiffs.size());
     assertEquals(s2222.getId(), barDiffs.get(3).getSnapshotId());
     assertEquals(s333.getId(), barDiffs.get(2).getSnapshotId());
@@ -1188,7 +1188,7 @@ public class TestRenameWithSnapshots {
     INodeReference.WithCount fooWC = (WithCount) fooRef.getReferredINode();
     assertEquals(1, fooWC.getReferenceCount());
     INodeDirectory fooDir = fooWC.getReferredINode().asDirectory();
-    List<DirectoryDiff> diffs = fooDir.getDiffs().asList();
+    DiffList<DirectoryDiff> diffs = fooDir.getDiffs().asList();
     assertEquals(1, diffs.size());
     assertEquals(s2.getId(), diffs.get(0).getSnapshotId());
     
@@ -1294,7 +1294,7 @@ public class TestRenameWithSnapshots {
         .getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(1, dir1Children.size());
     assertEquals(foo.getName(), dir1Children.get(0).getLocalName());
-    List<DirectoryDiff> dir1Diffs = dir1Node.getDiffs().asList();
+    DiffList<DirectoryDiff> dir1Diffs = dir1Node.getDiffs().asList();
     assertEquals(1, dir1Diffs.size());
     assertEquals(s1.getId(), dir1Diffs.get(0).getSnapshotId());
     
@@ -1306,7 +1306,8 @@ public class TestRenameWithSnapshots {
     
     INode fooNode = fsdir.getINode4Write(foo.toString());
     assertTrue(fooNode.isDirectory() && fooNode.asDirectory().isWithSnapshot());
-    List<DirectoryDiff> fooDiffs = fooNode.asDirectory().getDiffs().asList();
+    DiffList<DirectoryDiff> fooDiffs =
+        fooNode.asDirectory().getDiffs().asList();
     assertEquals(1, fooDiffs.size());
     assertEquals(s1.getId(), fooDiffs.get(0).getSnapshotId());
     
@@ -1364,7 +1365,7 @@ public class TestRenameWithSnapshots {
         .getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(1, dir1Children.size());
     assertEquals(foo.getName(), dir1Children.get(0).getLocalName());
-    List<DirectoryDiff> dir1Diffs = dir1Node.getDiffs().asList();
+    DiffList<DirectoryDiff> dir1Diffs = dir1Node.getDiffs().asList();
     assertEquals(1, dir1Diffs.size());
     assertEquals(s1.getId(), dir1Diffs.get(0).getSnapshotId());
     
@@ -1433,7 +1434,7 @@ public class TestRenameWithSnapshots {
     ReadOnlyList<INode> dir2Children = dir2Node
         .getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(1, dir2Children.size());
-    List<DirectoryDiff> dir2Diffs = dir2Node.getDiffs().asList();
+    DiffList<DirectoryDiff> dir2Diffs = dir2Node.getDiffs().asList();
     assertEquals(1, dir2Diffs.size());
     assertEquals(s2.getId(), dir2Diffs.get(0).getSnapshotId());
     ChildrenDiff childrenDiff = dir2Diffs.get(0).getChildrenDiff();
@@ -1445,7 +1446,8 @@ public class TestRenameWithSnapshots {
     INode fooNode = fsdir.getINode4Write(foo_dir2.toString());
     assertTrue(childrenDiff.getList(ListType.CREATED).get(0) == fooNode);
     assertTrue(fooNode instanceof INodeReference.DstReference);
-    List<DirectoryDiff> fooDiffs = fooNode.asDirectory().getDiffs().asList();
+    DiffList<DirectoryDiff> fooDiffs =
+        fooNode.asDirectory().getDiffs().asList();
     assertEquals(1, fooDiffs.size());
     assertEquals(s1.getId(), fooDiffs.get(0).getSnapshotId());
     
@@ -1594,7 +1596,7 @@ public class TestRenameWithSnapshots {
     INode barNode = fsdir2.getINode4Write(bar.toString());
     assertTrue(barNode.getClass() == INodeFile.class);
     assertSame(fooNode, barNode.getParent());
-    List<DirectoryDiff> diffList = dir1Node
+    DiffList<DirectoryDiff> diffList = dir1Node
         .getDiffs().asList();
     assertEquals(1, diffList.size());
     DirectoryDiff diff = diffList.get(0);
@@ -1668,7 +1670,7 @@ public class TestRenameWithSnapshots {
     INode fooNode = childrenList.get(0);
     assertTrue(fooNode.asDirectory().isWithSnapshot());
     assertSame(dir1Node, fooNode.getParent());
-    List<DirectoryDiff> diffList = dir1Node
+    DiffList<DirectoryDiff> diffList = dir1Node
         .getDiffs().asList();
     assertEquals(1, diffList.size());
     DirectoryDiff diff = diffList.get(0);
@@ -1728,7 +1730,7 @@ public class TestRenameWithSnapshots {
     ReadOnlyList<INode> children = fooNode
         .getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(1, children.size());
-    List<DirectoryDiff> diffList = fooNode.getDiffs().asList();
+    DiffList<DirectoryDiff> diffList = fooNode.getDiffs().asList();
     assertEquals(1, diffList.size());
     DirectoryDiff diff = diffList.get(0);
     // this diff is generated while renaming
@@ -1742,7 +1744,7 @@ public class TestRenameWithSnapshots {
     INodeFile barNode = fsdir.getINode4Write(bar.toString()).asFile();
     assertSame(barNode, children.get(0));
     assertSame(fooNode, barNode.getParent());
-    List<FileDiff> barDiffList = barNode.getDiffs().asList();
+    DiffList<FileDiff> barDiffList = barNode.getDiffs().asList();
     assertEquals(1, barDiffList.size());
     FileDiff barDiff = barDiffList.get(0);
     assertEquals(s1.getId(), barDiff.getSnapshotId());
@@ -1982,7 +1984,7 @@ public class TestRenameWithSnapshots {
         .getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(1, children.size());
     assertEquals(bar.getName(), children.get(0).getLocalName());
-    List<DirectoryDiff> diffList = fooNode.getDiffs().asList();
+    DiffList<DirectoryDiff> diffList = fooNode.getDiffs().asList();
     assertEquals(1, diffList.size());
     Snapshot s1 = dir1Node.getSnapshot(DFSUtil.string2Bytes("s1"));
     assertEquals(s1.getId(), diffList.get(0).getSnapshotId());
@@ -2054,7 +2056,7 @@ public class TestRenameWithSnapshots {
     assertEquals(bar.getName(), children.get(0).getLocalName());
     assertEquals(bar2.getName(), children.get(1).getLocalName());
     assertEquals(bar3.getName(), children.get(2).getLocalName());
-    List<DirectoryDiff> diffList = fooNode.getDiffs().asList();
+    DiffList<DirectoryDiff> diffList = fooNode.getDiffs().asList();
     assertEquals(1, diffList.size());
     Snapshot s1 = dir1Node.getSnapshot(DFSUtil.string2Bytes("s1"));
     assertEquals(s1.getId(), diffList.get(0).getSnapshotId());
@@ -2231,7 +2233,7 @@ public class TestRenameWithSnapshots {
     // check dir1: foo should be in the created list of s0
     INodeDirectory dir1Node = fsdir.getINode4Write(dir1.toString())
         .asDirectory();
-    List<DirectoryDiff> dir1DiffList = dir1Node.getDiffs().asList();
+    DiffList<DirectoryDiff> dir1DiffList = dir1Node.getDiffs().asList();
     assertEquals(1, dir1DiffList.size());
     List<INode> dList = dir1DiffList.get(0).getChildrenDiff()
         .getList(ListType.DELETED);
@@ -2249,7 +2251,7 @@ public class TestRenameWithSnapshots {
         .asDirectory();
     assertSame(fooNode.asDirectory(), barNode.getParent());
     // bar should only have a snapshot diff for s0
-    List<DirectoryDiff> barDiffList = barNode.getDiffs().asList();
+    DiffList<DirectoryDiff> barDiffList = barNode.getDiffs().asList();
     assertEquals(1, barDiffList.size());
     DirectoryDiff diff = barDiffList.get(0);
     INodeDirectory testNode = fsdir.getINode4Write(test.toString())
@@ -2264,7 +2266,7 @@ public class TestRenameWithSnapshots {
     // of the snapshot diff for s2
     INodeDirectory dir2Node = fsdir.getINode4Write(dir2.toString())
         .asDirectory();
-    List<DirectoryDiff> dir2DiffList = dir2Node.getDiffs().asList();
+    DiffList<DirectoryDiff> dir2DiffList = dir2Node.getDiffs().asList();
     // dir2Node should contain 1 snapshot diffs for s2
     assertEquals(1, dir2DiffList.size());
     dList = dir2DiffList.get(0).getChildrenDiff().getList(ListType.DELETED);
@@ -2318,7 +2320,7 @@ public class TestRenameWithSnapshots {
         "foo/bar");
     INodeDirectory barNode = fsdir.getINode(barInS0.toString()).asDirectory();
     assertEquals(0, barNode.getChildrenList(Snapshot.CURRENT_STATE_ID).size());
-    List<DirectoryDiff> diffList = barNode.getDiffs().asList();
+    DiffList<DirectoryDiff> diffList = barNode.getDiffs().asList();
     assertEquals(1, diffList.size());
     DirectoryDiff diff = diffList.get(0);
     assertEquals(0, diff.getChildrenDiff().getList(ListType.DELETED).size());

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6ea7d78c/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSetQuotaWithSnapshot.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSetQuotaWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSetQuotaWithSnapshot.java
index c5ac26e..2fecbb1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSetQuotaWithSnapshot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSetQuotaWithSnapshot.java
@@ -149,7 +149,8 @@ public class TestSetQuotaWithSnapshot {
     hdfs.setQuota(dir, HdfsConstants.QUOTA_RESET, HdfsConstants.QUOTA_RESET);
     INode subNode = fsdir.getINode4Write(subDir.toString());
     assertTrue(subNode.asDirectory().isWithSnapshot());
-    List<DirectoryDiff> diffList = subNode.asDirectory().getDiffs().asList();
+    DiffList<DirectoryDiff> diffList =
+        subNode.asDirectory().getDiffs().asList();
     assertEquals(1, diffList.size());
     Snapshot s2 = dirNode.getSnapshot(DFSUtil.string2Bytes("s2"));
     assertEquals(s2.getId(), diffList.get(0).getSnapshotId());

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6ea7d78c/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotRename.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotRename.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotRename.java
index 8c8fca7..01157e8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotRename.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotRename.java
@@ -24,7 +24,6 @@ import static org.junit.Assert.fail;
 
 import java.io.ByteArrayOutputStream;
 import java.io.PrintStream;
-import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
@@ -101,7 +100,7 @@ public class TestSnapshotRename {
     for (int i = 0; i < listByName.size(); i++) {
       assertEquals(sortedNames[i], listByName.get(i).getRoot().getLocalName());
     }
-    List<DirectoryDiff> listByTime = srcRoot.getDiffs().asList();
+    DiffList<DirectoryDiff> listByTime = srcRoot.getDiffs().asList();
     assertEquals(names.length, listByTime.size());
     for (int i = 0; i < listByTime.size(); i++) {
       Snapshot s = srcRoot.getDirectorySnapshottableFeature().getSnapshotById(


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[04/21] hadoop git commit: HADOOP-15076. Enhance S3A troubleshooting documents and add a performance document. Contributed by Steve Loughran.

Posted by ha...@apache.org.

HADOOP-15076. Enhance S3A troubleshooting documents and add a performance document.
Contributed by Steve Loughran.

(cherry picked from commit c761e658f6594c4e519ed39ef36669de2c5cee15)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b27ab7dd
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b27ab7dd
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b27ab7dd

Branch: refs/heads/HDFS-12996
Commit: b27ab7dd81359df0a7594ebb98e656a41cd19250
Parents: c9a373f
Author: Steve Loughran <st...@apache.org>
Authored: Thu Feb 15 14:57:56 2018 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Feb 15 14:57:56 2018 +0000

----------------------------------------------------------------------
 .../markdown/tools/hadoop-aws/encryption.md     |  21 +-
 .../src/site/markdown/tools/hadoop-aws/index.md |  77 +-
 .../markdown/tools/hadoop-aws/performance.md    | 518 +++++++++++++
 .../tools/hadoop-aws/troubleshooting_s3a.md     | 753 ++++++++++++-------
 4 files changed, 1029 insertions(+), 340 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/b27ab7dd/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md
index 719c5e5..54398d7 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md
@@ -37,6 +37,8 @@ and keys with which the file was encrypted.
 * You can use AWS bucket policies to mandate encryption rules for a bucket.
 * You can use S3A per-bucket configuration to ensure that S3A clients use encryption
 policies consistent with the mandated rules.
+* You can use S3 Default Encryption to encrypt data without needing to
+set anything in the client.
 * Changing the encryption options on the client does not change how existing
 files were encrypted, except when the files are renamed.
 * For all mechanisms other than SSE-C, clients do not need any configuration
@@ -58,9 +60,10 @@ The server-side "SSE" encryption is performed with symmetric AES256 encryption;
 S3 offers different mechanisms for actually defining the key to use.
 
 
-There are thrre key management mechanisms, which in order of simplicity of use,
+There are four key management mechanisms, which in order of simplicity of use,
 are:
 
+* S3 Default Encryption
 * SSE-S3: an AES256 key is generated in S3, and saved alongside the data.
 * SSE-KMS: an AES256 key is generated in S3, and encrypted with a secret key provided
 by Amazon's Key Management Service, a key referenced by name in the uploading client.
@@ -68,6 +71,19 @@ by Amazon's Key Management Service, a key referenced by name in the uploading cl
 to encrypt and decrypt the data.
 
 
+## <a name="sse-s3"></a> S3 Default Encryption
+
+This feature allows the administrators of the AWS account to set the "default"
+encryption policy on a bucket -the encryption to use if the client does
+not explicitly declare an encryption algorithm.
+
+[S3 Default Encryption for S3 Buckets](https://docs.aws.amazon.com/AmazonS3/latest/dev/bucket-encryption.html)
+
+This supports SSE-S3 and SSE-KMS.
+
+There is no need to set anything up in the client: do it in the AWS console.
+
+
 ## <a name="sse-s3"></a> SSE-S3 Amazon S3-Managed Encryption Keys
 
 In SSE-S3, all keys and secrets are managed inside S3. This is the simplest encryption mechanism.
@@ -413,7 +429,6 @@ How can you do that from Hadoop? With `rename()`.
 
 The S3A client mimics a real filesystem's' rename operation by copying all the
 source files to the destination paths, then deleting the old ones.
-If you do a rename()
 
 Note: this does not work for SSE-C, because you cannot set a different key
 for reading as for writing, and you must supply that key for reading. There
@@ -421,7 +436,7 @@ you need to copy one bucket to a different bucket, one with a different key.
 Use `distCp`for this, with per-bucket encryption policies.
 
 
-## <a name="Troubleshooting"></a> Troubleshooting Encryption
+## <a name="troubleshooting"></a> Troubleshooting Encryption
 
 The [troubleshooting](./troubleshooting_s3a.html) document covers
 stack traces which may surface when working with encrypted data.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b27ab7dd/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 0e03100..edf392d 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -25,6 +25,7 @@ Please use `s3a:` as the connector to data hosted in S3 with Apache Hadoop.**
 See also:
 
 * [Encryption](./encryption.html)
+* [Performance](./performance.html)
 * [S3Guard](./s3guard.html)
 * [Troubleshooting](./troubleshooting_s3a.html)
 * [Committing work to S3 with the "S3A Committers"](./committers.html)
@@ -1580,80 +1581,8 @@ The S3A Filesystem client supports the notion of input policies, similar
 to that of the Posix `fadvise()` API call. This tunes the behavior of the S3A
 client to optimise HTTP GET requests for the different use cases.
 
-*"sequential"*
-
-Read through the file, possibly with some short forward seeks.
-
-The whole document is requested in a single HTTP request; forward seeks
-within the readahead range are supported by skipping over the intermediate
-data.
-
-This is leads to maximum read throughput —but with very expensive
-backward seeks.
-
-
-*"normal" (default)*
-
-The "Normal" policy starts off reading a file  in "sequential" mode,
-but if the caller seeks backwards in the stream, it switches from
-sequential to "random".
-
-This policy effectively recognizes the initial read pattern of columnar
-storage formats (e.g. Apache ORC and Apache Parquet), which seek to the end
-of a file, read in index data and then seek backwards to selectively read
-columns. The first seeks may be be expensive compared to the random policy,
-however the overall process is much less expensive than either sequentially
-reading through a file with the "random" policy, or reading columnar data
-with the "sequential" policy. When the exact format/recommended
-seek policy of data are known in advance, this policy
-
-*"random"*
-
-Optimised for random IO, specifically the Hadoop `PositionedReadable`
-operations —though `seek(offset); read(byte_buffer)` also benefits.
-
-Rather than ask for the whole file, the range of the HTTP request is
-set to that that of the length of data desired in the `read` operation
-(Rounded up to the readahead value set in `setReadahead()` if necessary).
-
-By reducing the cost of closing existing HTTP requests, this is
-highly efficient for file IO accessing a binary file
-through a series of `PositionedReadable.read()` and `PositionedReadable.readFully()`
-calls. Sequential reading of a file is expensive, as now many HTTP requests must
-be made to read through the file.
-
-For operations simply reading through a file: copying, distCp, reading
-Gzipped or other compressed formats, parsing .csv files, etc, the `sequential`
-policy is appropriate. This is the default: S3A does not need to be configured.
-
-For the specific case of high-performance random access IO, the `random` policy
-may be considered. The requirements are:
-
-* Data is read using the `PositionedReadable` API.
-* Long distance (many MB) forward seeks
-* Backward seeks as likely as forward seeks.
-* Little or no use of single character `read()` calls or small `read(buffer)`
-calls.
-* Applications running close to the S3 data store. That is: in EC2 VMs in
-the same datacenter as the S3 instance.
-
-The desired fadvise policy must be set in the configuration option
-`fs.s3a.experimental.input.fadvise` when the filesystem instance is created.
-That is: it can only be set on a per-filesystem basis, not on a per-file-read
-basis.
-
-    <property>
-      <name>fs.s3a.experimental.input.fadvise</name>
-      <value>random</value>
-      <description>Policy for reading files.
-       Values: 'random', 'sequential' or 'normal'
-       </description>
-    </property>
-
-[HDFS-2744](https://issues.apache.org/jira/browse/HDFS-2744),
-*Extend FSDataInputStream to allow fadvise* proposes adding a public API
-to set fadvise policies on input streams. Once implemented,
-this will become the supported mechanism used for configuring the input IO policy.
+See [Improving data input performance through fadvise](./performance.html#fadvise)
+for the details.
 
 ##<a name="metrics"></a>Metrics
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b27ab7dd/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md
new file mode 100644
index 0000000..e8f4d20
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md
@@ -0,0 +1,518 @@
+<!---
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+# Maximizing Performance when working with the S3A Connector
+
+<!-- MACRO{toc|fromDepth=0|toDepth=3} -->
+
+
+## <a name="introduction"></a> Introduction
+
+S3 is slower to work with than HDFS, even on virtual clusters running on
+Amazon EC2.
+
+That's because its a very different system, as you can see:
+
+
+| Feature | HDFS | S3 through the S3A connector |
+|---------|------|------------------------------|
+| communication | RPC | HTTP GET/PUT/HEAD/LIST/COPY requests |
+| data locality | local storage | remote S3 servers |
+| replication | multiple datanodes | asynchronous after upload |
+| consistency | consistent data and listings | eventual consistent for listings, deletes and updates |
+| bandwidth | best: local IO, worst: datacenter network | bandwidth between servers and S3 |
+| latency | low | high, especially for "low cost" directory operations |
+| rename | fast, atomic | slow faked rename through COPY & DELETE|
+| delete | fast, atomic | fast for a file, slow & non-atomic for directories |
+| writing| incremental | in blocks; not visible until the writer is closed |
+| reading | seek() is fast | seek() is slow and expensive |
+| IOPs | limited only by hardware | callers are throttled to shards in an s3 bucket |
+| Security | Posix user+group; ACLs | AWS Roles and policies |
+
+From a performance perspective, key points to remember are:
+
+* S3 throttles bucket access across all callers: adding workers can make things worse.
+* EC2 VMs have network IO throttled based on the VM type.
+* Directory rename and copy operations take *much* longer the more objects and data there is.
+The slow performance of `rename()` surfaces during the commit phase of jobs,
+applications like `DistCP`, and elsewhere.
+* seek() calls when reading a file can force new HTTP requests.
+This can make reading columnar Parquet/ORC data expensive.
+
+Overall, although the S3A connector makes S3 look like a file system,
+it isn't, and some attempts to preserve the metaphor are "aggressively suboptimal".
+
+To make most efficient use of S3, care is needed.
+
+## <a name="s3guard"></a> Speeding up directory listing operations through S3Guard
+
+[S3Guard](s3guard.html) provides significant speedups for operations which
+list files a lot. This includes the setup of all queries against data:
+MapReduce, Hive and Spark, as well as DistCP.
+
+
+Experiment with using it to see what speedup it delivers.
+
+
+## <a name="fadvise"></a> Improving data input performance through fadvise
+
+The S3A Filesystem client supports the notion of input policies, similar
+to that of the Posix `fadvise()` API call. This tunes the behavior of the S3A
+client to optimise HTTP GET requests for the different use cases.
+
+### fadvise `sequential`
+
+Read through the file, possibly with some short forward seeks.
+
+The whole document is requested in a single HTTP request; forward seeks
+within the readahead range are supported by skipping over the intermediate
+data.
+
+This delivers maximum sequential throughput —but with very expensive
+backward seeks.
+
+Applications reading a file in bulk (DistCP, any copy operations) should use
+sequential access, as should those reading data from gzipped `.gz` files.
+Because the "normal" fadvise policy starts off in sequential IO mode,
+there is rarely any need to explicit request this policy.
+
+### fadvise `random`
+
+Optimised for random IO, specifically the Hadoop `PositionedReadable`
+operations —though `seek(offset); read(byte_buffer)` also benefits.
+
+Rather than ask for the whole file, the range of the HTTP request is
+set to that that of the length of data desired in the `read` operation
+(Rounded up to the readahead value set in `setReadahead()` if necessary).
+
+By reducing the cost of closing existing HTTP requests, this is
+highly efficient for file IO accessing a binary file
+through a series of `PositionedReadable.read()` and `PositionedReadable.readFully()`
+calls. Sequential reading of a file is expensive, as now many HTTP requests must
+be made to read through the file: there's a delay between each GET operation.
+
+
+Random IO is best for IO with seek-heavy characteristics:
+
+* Data is read using the `PositionedReadable` API.
+* Long distance (many MB) forward seeks
+* Backward seeks as likely as forward seeks.
+* Little or no use of single character `read()` calls or small `read(buffer)`
+calls.
+* Applications running close to the S3 data store. That is: in EC2 VMs in
+the same datacenter as the S3 instance.
+
+The desired fadvise policy must be set in the configuration option
+`fs.s3a.experimental.input.fadvise` when the filesystem instance is created.
+That is: it can only be set on a per-filesystem basis, not on a per-file-read
+basis.
+
+```xml
+<property>
+  <name>fs.s3a.experimental.input.fadvise</name>
+  <value>random</value>
+  <description>
+  Policy for reading files.
+  Values: 'random', 'sequential' or 'normal'
+   </description>
+</property>
+```
+
+[HDFS-2744](https://issues.apache.org/jira/browse/HDFS-2744),
+*Extend FSDataInputStream to allow fadvise* proposes adding a public API
+to set fadvise policies on input streams. Once implemented,
+this will become the supported mechanism used for configuring the input IO policy.
+
+### fadvise `normal` (default)
+
+The `normal` policy starts off reading a file  in `sequential` mode,
+but if the caller seeks backwards in the stream, it switches from
+sequential to `random`.
+
+This policy essentially recognizes the initial read pattern of columnar
+storage formats (e.g. Apache ORC and Apache Parquet), which seek to the end
+of a file, read in index data and then seek backwards to selectively read
+columns. The first seeks may be be expensive compared to the random policy,
+however the overall process is much less expensive than either sequentially
+reading through a file with the `random` policy, or reading columnar data
+with the `sequential` policy.
+
+
+## <a name="commit"></a> Committing Work in MapReduce and Spark
+
+Hadoop MapReduce, Apache Hive and Apache Spark all write their work
+to HDFS and similar filesystems.
+When using S3 as a destination, this is slow because of the way `rename()`
+is mimicked with copy and delete.
+
+If committing output takes a long time, it is because you are using the standard
+`FileOutputCommitter`. If you are doing this on any S3 endpoint which lacks
+list consistency (Amazon S3 without [S3Guard](s3guard.html)), this committer
+is at risk of losing data!
+
+*Your problem may appear to be performance, but that is a symptom
+of the underlying problem: the way S3A fakes rename operations means that
+the rename cannot be safely be used in output-commit algorithms.*
+
+Fix: Use one of the dedicated [S3A Committers](committers.md).
+
+## <a name="tuning"></a> Options to Tune
+
+### <a name="pooling"></a> Thread and connection pool sizes.
+
+Each S3A client interacting with a single bucket, as a single user, has its
+own dedicated pool of open HTTP 1.1 connections alongside a pool of threads used
+for upload and copy operations.
+The default pool sizes are intended to strike a balance between performance
+and memory/thread use.
+
+You can have a larger pool of (reused) HTTP connections and threads
+for parallel IO (especially uploads) by setting the properties
+
+
+| property | meaning | default |
+|----------|---------|---------|
+| `fs.s3a.threads.max`| Threads in the AWS transfer manager| 10 |
+| `fs.s3a.connection.maximum`| Maximum number of HTTP connections | 10|
+
+We recommend using larger values for processes which perform
+a lot of IO: `DistCp`, Spark Workers and similar.
+
+```xml
+<property>
+  <name>fs.s3a.threads.max</name>
+  <value>20</value>
+</property>
+<property>
+  <name>fs.s3a.connection.maximum</name>
+  <value>20</value>
+</property>
+```
+
+Be aware, however, that processes which perform many parallel queries
+may consume large amounts of resources if each query is working with
+a different set of s3 buckets, or are acting on behalf of different users.
+
+### For large data uploads, tune the block size: `fs.s3a.block.size`
+
+When uploading data, it is uploaded in blocks set by the option
+`fs.s3a.block.size`; default value "32M" for 32 Megabytes.
+
+If a larger value is used, then more data is buffered before the upload
+begins:
+
+```xml
+<property>
+  <name>fs.s3a.block.size</name>
+  <value>128M</value>
+</property>
+```
+
+This means that fewer PUT/POST requests are made of S3 to upload data,
+which reduces the likelihood that S3 will throttle the client(s)
+
+### Maybe: Buffer Write Data in Memory
+
+When large files are being uploaded, blocks are saved to disk and then
+queued for uploading, with multiple threads uploading different blocks
+in parallel.
+
+The blocks can be buffered in memory by setting the option
+`fs.s3a.fast.upload.buffer` to `bytebuffer`, or, for on-heap storage
+`array`.
+
+1. Switching to in memory-IO reduces disk IO, and can be faster if the bandwidth
+to the S3 store is so high that the disk IO becomes the bottleneck.
+This can have a tangible benefit when working with on-premise S3-compatible
+object stores with very high bandwidth to servers.
+
+It is very easy to run out of memory when buffering to it; the option
+`fs.s3a.fast.upload.active.blocks"` exists to tune how many active blocks
+a single output stream writing to S3 may have queued at a time.
+
+As the size of each buffered block is determined by the value of `fs.s3a.block.size`,
+the larger the block size, the more likely you will run out of memory.
+
+## <a name="distcp"></a> DistCP
+
+DistCP can be slow, especially if the parameters and options for the operation
+are not tuned for working with S3.
+
+To exacerbate the issue, DistCP invariably puts heavy load against the
+bucket being worked with, which will cause S3 to throttle requests.
+It will throttle: directory operations, uploads of new data, and delete operations,
+amongst other things
+
+### DistCP: Options to Tune
+
+* `-numListstatusThreads <threads>` : set to something higher than the default (1).
+* `-bandwidth <mb>` : use to limit the upload bandwidth per worker
+* `-m <maps>` : limit the number of mappers, hence the load on the S3 bucket.
+
+Adding more maps with the `-m` option does not guarantee better performance;
+it may just increase the amount of throttling which takes place.
+A smaller number of maps with a higher bandwidth per map can be more efficient.
+
+### DistCP: Options to Avoid.
+
+DistCp's `-atomic` option copies up data into a directory, then renames
+it into place, which is the where the copy takes place. This is a performance
+killer.
+
+* Do not use the `-atomic` option.
+* The `-append` operation is not supported on S3; avoid.
+* `-p` S3 does not have a POSIX-style permission model; this will fail.
+
+
+### DistCP: Parameters to Tune
+
+1. As discussed [earlier](#pooling), use large values for
+`fs.s3a.threads.max` and `fs.s3a.connection.maximum`.
+
+1. Make sure that the bucket is using `sequential` or `normal` fadvise seek policies,
+that is, `fs.s3a.experimental.fadvise` is not set to `random`
+
+1. Perform listings in parallel by setting `-numListstatusThreads`
+to a higher number. Make sure that `fs.s3a.connection.maximum`
+is equal to or greater than the value used.
+
+1. If using `-delete`, set `fs.trash.interval` to 0 to avoid the deleted
+objects from being copied to a trash directory.
+
+*DO NOT* switch `fs.s3a.fast.upload.buffer` to buffer in memory.
+If one distcp mapper runs out of memory it will fail,
+and that runs the risk of failing the entire job.
+It is safer to keep the default value, `disk`.
+
+What is potentially useful is uploading in bigger blocks; this is more
+efficient in terms of HTTP connection use, and reduce the IOP rate against
+the S3 bucket/shard.
+
+```xml
+<property>
+  <name>fs.s3a.threads.max</name>
+  <value>20</value>
+</property>
+
+<property>
+  <name>fs.s3a.connection.maximum</name>
+  <value>30</value>
+  <descriptiom>
+   Make greater than both fs.s3a.threads.max and -numListstatusThreads
+   </descriptiom>
+</property>
+
+<property>
+  <name>fs.s3a.experimental.fadvise</name>
+  <value>normal</value>
+</property>
+
+<property>
+  <name>fs.s3a.block.size</name>
+  <value>128M</value>
+</property>
+
+<property>
+  <name>fs.s3a.fast.upload.buffer</name>
+  <value>disk</value>
+</property>
+
+<property>
+  <name>fs.trash.interval</name>
+  <value>0</value>
+</property>
+```
+
+## <a name="rm"></a> hadoop shell commands `fs -rm`
+
+The `hadoop fs -rm` command can rename the file under `.Trash` rather than
+deleting it. Use `-skipTrash` to eliminate that step.
+
+
+This can be set in the property `fs.trash.interval`; while the default is 0,
+most HDFS deployments have it set to a non-zero value to reduce the risk of
+data loss.
+
+```xml
+<property>
+  <name>fs.trash.interval</name>
+  <value>0</value>
+</property>
+```
+
+
+## <a name="load balancing"></a> Improving S3 load-balancing behavior
+
+Amazon S3 uses a set of front-end servers to provide access to the underlying data.
+The choice of which front-end server to use is handled via load-balancing DNS
+service: when the IP address of an S3 bucket is looked up, the choice of which
+IP address to return to the client is made based on the the current load
+of the front-end servers.
+
+Over time, the load across the front-end changes, so those servers considered
+"lightly loaded" will change. If the DNS value is cached for any length of time,
+your application may end up talking to an overloaded server. Or, in the case
+of failures, trying to talk to a server that is no longer there.
+
+And by default, for historical security reasons in the era of applets,
+the DNS TTL of a JVM is "infinity".
+
+To work with AWS better, set the DNS time-to-live of an application which
+works with S3 to something lower.
+See [AWS documentation](http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/java-dg-jvm-ttl.html).
+
+## <a name="network_performance"></a> Troubleshooting network performance
+
+An example of this is covered in [HADOOP-13871](https://issues.apache.org/jira/browse/HADOOP-13871).
+
+1. For public data, use `curl`:
+
+        curl -O https://landsat-pds.s3.amazonaws.com/scene_list.gz
+1. Use `nettop` to monitor a processes connections.
+
+
+## <a name="throttling"></a> Throttling
+
+When many requests are made of a specific S3 bucket (or shard inside it),
+S3 will respond with a 503 "throttled" response.
+Throttling can be recovered from, provided overall load decreases.
+Furthermore, because it is sent before any changes are made to the object store,
+is inherently idempotent. For this reason, the client will always attempt to
+retry throttled requests.
+
+The limit of the number of times a throttled request can be retried,
+and the exponential interval increase between attempts, can be configured
+independently of the other retry limits.
+
+```xml
+<property>
+  <name>fs.s3a.retry.throttle.limit</name>
+  <value>20</value>
+  <description>
+    Number of times to retry any throttled request.
+  </description>
+</property>
+
+<property>
+  <name>fs.s3a.retry.throttle.interval</name>
+  <value>500ms</value>
+  <description>
+    Interval between retry attempts on throttled requests.
+  </description>
+</property>
+```
+
+If a client is failing due to `AWSServiceThrottledException` failures,
+increasing the interval and limit *may* address this. However, it
+it is a sign of AWS services being overloaded by the sheer number of clients
+and rate of requests. Spreading data across different buckets, and/or using
+a more balanced directory structure may be beneficial.
+Consult [the AWS documentation](http://docs.aws.amazon.com/AmazonS3/latest/dev/request-rate-perf-considerations.html).
+
+Reading or writing data encrypted with SSE-KMS forces S3 to make calls of
+the AWS KMS Key Management Service, which comes with its own
+[Request Rate Limits](http://docs.aws.amazon.com/kms/latest/developerguide/limits.html).
+These default to 1200/second for an account, across all keys and all uses of
+them, which, for S3 means: across all buckets with data encrypted with SSE-KMS.
+
+### <a name="minimizing_throttling"></a> Tips to Keep Throttling down
+
+If you are seeing a lot of throttling responses on a large scale
+operation like a `distcp` copy, *reduce* the number of processes trying
+to work with the bucket (for distcp: reduce the number of mappers with the
+`-m` option).
+
+If you are reading or writing lists of files, if you can randomize
+the list so they are not processed in a simple sorted order, you may
+reduce load on a specific shard of S3 data, so potentially increase throughput.
+
+An S3 Bucket is throttled by requests coming from all
+simultaneous clients. Different applications and jobs may interfere with
+each other: consider that when troubleshooting.
+Partitioning data into different buckets may help isolate load here.
+
+If you are using data encrypted with SSE-KMS, then the
+will also apply: these are stricter than the S3 numbers.
+If you believe that you are reaching these limits, you may be able to
+get them increased.
+Consult [the KMS Rate Limit documentation](http://docs.aws.amazon.com/kms/latest/developerguide/limits.html).
+
+### <a name="s3guard_throttling"></a> S3Guard and Throttling
+
+
+S3Guard uses DynamoDB for directory and file lookups;
+it is rate limited to the amount of (guaranteed) IO purchased for a
+table.
+
+To see the allocated capacity of a bucket, the `hadoop s3guard bucket-info s3a://bucket`
+command will print out the allocated capacity.
+
+
+If significant throttling events/rate is observed here, the pre-allocated
+IOPs can be increased with the `hadoop s3guard set-capacity` command, or
+through the AWS Console. Throttling events in S3Guard are noted in logs, and
+also in the S3A metrics `s3guard_metadatastore_throttle_rate` and
+`s3guard_metadatastore_throttled`.
+
+If you are using DistCP for a large backup to/from a S3Guarded bucket, it is
+actually possible to increase the capacity for the duration of the operation.
+
+
+## <a name="coding"></a> Best Practises for Code
+
+Here are some best practises if you are writing applications to work with
+S3 or any other object store through the Hadoop APIs.
+
+Use `listFiles(path, recursive)` over `listStatus(path)`.
+The recursive `listFiles()` call can enumerate all dependents of a path
+in a single LIST call, irrespective of how deep the path is.
+In contrast, any directory tree-walk implemented in the client is issuing
+multiple HTTP requests to scan each directory, all the way down.
+
+Cache the outcome of `getFileStats()`, rather than repeatedly ask for it.
+That includes using `isFile()`, `isDirectory()`, which are simply wrappers
+around `getFileStatus()`.
+
+Don't immediately look for a file with a `getFileStatus()` or listing call
+after creating it, or try to read it immediately.
+This is where eventual consistency problems surface: the data may not yet be visible.
+
+Rely on `FileNotFoundException` being raised if the source of an operation is
+missing, rather than implementing your own probe for the file before
+conditionally calling the operation.
+
+### `rename()`
+
+Avoid any algorithm which uploads data into a temporary file and then uses
+`rename()` to commit it into place with a final path.
+On HDFS this offers a fast commit operation.
+With S3, Wasb and other object stores, you can write straight to the destination,
+knowing that the file isn't visible until you close the write: the write itself
+is atomic.
+
+The `rename()` operation may return `false` if the source is missing; this
+is a weakness in the API. Consider a check before calling rename, and if/when
+a new rename() call is made public, switch to it.
+
+
+### `delete(path, recursive)`
+
+Keep in mind that `delete(path, recursive)` is a no-op if the path does not exist, so
+there's no need to have a check for the path existing before you call it.
+
+`delete()` is often used as a cleanup operation.
+With an object store this is slow, and may cause problems if the caller
+expects an immediate response. For example, a thread may block so long
+that other liveness checks start to fail.
+Consider spawning off an executor thread to do these background cleanup operations.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b27ab7dd/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md
index 1f3382c..97f9642 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md
@@ -14,9 +14,9 @@
 
 # Troubleshooting
 
-<!-- MACRO{toc|fromDepth=0|toDepth=5} -->
+<!-- MACRO{toc|fromDepth=0|toDepth=3} -->
 
-##<a name="introduction"></a> Introduction
+## <a name="introduction"></a> Introduction
 
 Common problems working with S3 are
 
@@ -24,28 +24,42 @@ Common problems working with S3 are
 1. Authentication
 1. S3 Inconsistency side-effects
 
-Classpath is usually the first problem. For the S3x filesystem clients,
-you need the Hadoop-specific filesystem clients, third party S3 client libraries
-compatible with the Hadoop code, and any dependent libraries compatible with
+
+Troubleshooting IAM Assumed Roles is covered in its
+[specific documentation](assumed_roles.html#troubleshooting).
+
+## <a name="classpath"></a> Classpath Setup
+
+Classpath is usually the first problem. For the S3A filesystem client,
+you need the Hadoop-specific filesystem clients, the very same AWS SDK library
+which Hadoop was built against, and any dependent libraries compatible with
 Hadoop and the specific JVM.
 
 The classpath must be set up for the process talking to S3: if this is code
 running in the Hadoop cluster, the JARs must be on that classpath. That
 includes `distcp` and the `hadoop fs` command.
 
-<!-- MACRO{toc|fromDepth=0|toDepth=2} -->
+<b>Critical:</b> *Do not attempt to "drop in" a newer version of the AWS
+SDK than that which the Hadoop version was built with*
+Whatever problem you have, changing the AWS SDK version will not fix things,
+only change the stack traces you see.
 
-Troubleshooting IAM Assumed Roles is covered in its
-[specific documentation](assumed_roles.html#troubeshooting).
+Similarly, don't try and mix a `hadoop-aws` JAR from one Hadoop release
+with that of any other. The JAR must be in sync with `hadoop-common` and
+some other Hadoop JARs.
 
-## <a name="classpath"></a> Classpath Setup
+<i>Randomly changing hadoop- and aws- JARs in the hope of making a problem
+"go away" or to gain access to a feature you want,
+will not lead to the outcome you desire.</i>
+
+Tip: you can use [mvnrepository](http://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws)
+to determine the dependency version requirements of a specific `hadoop-aws`
+JAR published by the ASF.
 
-Note that for security reasons, the S3A client does not provide much detail
-on the authentication process (i.e. the secrets used to authenticate).
 
 ### `ClassNotFoundException: org.apache.hadoop.fs.s3a.S3AFileSystem`
 
-These is Hadoop filesytem client classes, found in the `hadoop-aws` JAR.
+These are Hadoop filesystem client classes, found in the `hadoop-aws` JAR.
 An exception reporting this class as missing means that this JAR is not on
 the classpath.
 
@@ -56,7 +70,7 @@ the classpath.
 This means that the `aws-java-sdk-bundle.jar` JAR is not on the classpath:
 add it.
 
-### Missing method in `com.amazonaws` class
+### `java.lang.NoSuchMethodError` referencing a `com.amazonaws` class
 
 This can be triggered by incompatibilities between the AWS SDK on the classpath
 and the version which Hadoop was compiled with.
@@ -68,6 +82,15 @@ version.
 The sole fix is to use the same version of the AWS SDK with which Hadoop
 was built.
 
+This can also be caused by having more than one version of an AWS SDK
+JAR on the classpath. If the full `aws-java-sdk-bundle<` JAR is on the
+classpath, do not add any of the `aws-sdk-` JARs.
+
+
+### `java.lang.NoSuchMethodError` referencing an `org.apache.hadoop` class
+
+This happens if the `hadoop-aws` and `hadoop-common` JARs are out of sync.
+You can't mix them around: they have to have exactly matching version numbers.
 
 ## <a name="authentication"></a> Authentication Failure
 
@@ -115,7 +138,7 @@ mechanism.
 1. If using session authentication, the session may have expired.
 Generate a new session token and secret.
 
-1. If using environement variable-based authentication, make sure that the
+1. If using environment variable-based authentication, make sure that the
 relevant variables are set in the environment in which the process is running.
 
 The standard first step is: try to use the AWS command line tools with the same
@@ -126,7 +149,6 @@ credentials, through a command such as:
 Note the trailing "/" here; without that the shell thinks you are trying to list
 your home directory under the bucket, which will only exist if explicitly created.
 
-
 Attempting to list a bucket using inline credentials is a
 means of verifying that the key and secret can access a bucket;
 
@@ -186,7 +208,9 @@ Requests using the V2 API will be rejected with 400 `Bad Request`
 $ bin/hadoop fs -ls s3a://frankfurt/
 WARN s3a.S3AFileSystem: Client: Amazon S3 error 400: 400 Bad Request; Bad Request (retryable)
 
-com.amazonaws.services.s3.model.AmazonS3Exception: Bad Request (Service: Amazon S3; Status Code: 400; Error Code: 400 Bad Request; Request ID: 923C5D9E75E44C06), S3 Extended Request ID: HDwje6k+ANEeDsM6aJ8+D5gUmNAMguOk2BvZ8PH3g9z0gpH+IuwT7N19oQOnIr5CIx7Vqb/uThE=
+com.amazonaws.services.s3.model.AmazonS3Exception: Bad Request (Service: Amazon S3;
+ Status Code: 400; Error Code: 400 Bad Request; Request ID: 923C5D9E75E44C06),
+  S3 Extended Request ID: HDwje6k+ANEeDsM6aJ8+D5gUmNAMguOk2BvZ8PH3g9z0gpH+IuwT7N19oQOnIr5CIx7Vqb/uThE=
     at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182)
     at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770)
     at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
@@ -231,13 +255,129 @@ As an example, the endpoint for S3 Frankfurt is `s3.eu-central-1.amazonaws.com`:
 </property>
 ```
 
+## <a name="access_denied"></a> `AccessDeniedException` "Access Denied"
+
+### <a name="access_denied_unknown-ID"></a> AccessDeniedException "The AWS Access Key Id you provided does not exist in our records."
+
+The value of `fs.s3a.access.key` does not match a known access key ID.
+It may be mistyped, or the access key may have been deleted by one of the account managers.
+
+```
+java.nio.file.AccessDeniedException: bucket: doesBucketExist on bucket:
+    com.amazonaws.services.s3.model.AmazonS3Exception:
+    The AWS Access Key Id you provided does not exist in our records.
+     (Service: Amazon S3; Status Code: 403; Error Code: InvalidAccessKeyId;
+  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:214)
+  at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:111)
+  at org.apache.hadoop.fs.s3a.Invoker.lambda$retry$3(Invoker.java:260)
+  at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:314)
+  at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:256)
+  at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:231)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.verifyBucketExists(S3AFileSystem.java:366)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:302)
+  at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3354)
+  at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:124)
+  at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3403)
+  at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3371)
+  at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:477)
+  at org.apache.hadoop.fs.contract.AbstractBondedFSContract.init(AbstractBondedFSContract.java:72)
+  at org.apache.hadoop.fs.contract.AbstractFSContractTestBase.setup(AbstractFSContractTestBase.java:177)
+  at org.apache.hadoop.fs.s3a.commit.AbstractCommitITest.setup(AbstractCommitITest.java:163)
+  at org.apache.hadoop.fs.s3a.commit.AbstractITCommitMRJob.setup(AbstractITCommitMRJob.java:129)
+  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
+  at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
+  at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
+  at java.lang.reflect.Method.invoke(Method.java:498)
+  at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
+  at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
+  at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
+  at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:24)
+  at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
+  at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
+  at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
+  at org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74)
+Caused by: com.amazonaws.services.s3.model.AmazonS3Exception:
+               The AWS Access Key Id you provided does not exist in our records.
+                (Service: Amazon S3; Status Code: 403; Error Code: InvalidAccessKeyId;
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1638)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1303)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1055)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:743)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:717)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
+  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
+  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4229)
+  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4176)
+  at com.amazonaws.services.s3.AmazonS3Client.getAcl(AmazonS3Client.java:3381)
+  at com.amazonaws.services.s3.AmazonS3Client.getBucketAcl(AmazonS3Client.java:1160)
+  at com.amazonaws.services.s3.AmazonS3Client.getBucketAcl(AmazonS3Client.java:1150)
+  at com.amazonaws.services.s3.AmazonS3Client.doesBucketExist(AmazonS3Client.java:1266)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$verifyBucketExists$1(S3AFileSystem.java:367)
+  at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:109)
+  ... 27 more
+
+```
+
+###  <a name="access_denied_disabled"></a> `AccessDeniedException` All access to this object has been disabled
 
-### "403 Access denied" when trying to write data
+Caller has no permission to access the bucket at all.
+
+```
+doesBucketExist on fdsd: java.nio.file.AccessDeniedException: fdsd: doesBucketExist on fdsd:
+ com.amazonaws.services.s3.model.AmazonS3Exception: All access to this object has been disabled
+ (Service: Amazon S3; Status Code: 403; Error Code: AllAccessDisabled; Request ID: E6229D7F8134E64F;
+  S3 Extended Request ID: 6SzVz2t4qa8J2Wxo/oc8yBuB13Mgrn9uMKnxVY0hsBd2kU/YdHzW1IaujpJdDXRDCQRX3f1RYn0=),
+  S3 Extended Request ID: 6SzVz2t4qa8J2Wxo/oc8yBuB13Mgrn9uMKnxVY0hsBd2kU/YdHzW1IaujpJdDXRDCQRX3f1RYn0=:AllAccessDisabled
+ All access to this object has been disabled (Service: Amazon S3; Status Code: 403;
+  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:205)
+  at org.apache.hadoop.fs.s3a.S3ALambda.once(S3ALambda.java:122)
+  at org.apache.hadoop.fs.s3a.S3ALambda.lambda$retry$2(S3ALambda.java:233)
+  at org.apache.hadoop.fs.s3a.S3ALambda.retryUntranslated(S3ALambda.java:288)
+  at org.apache.hadoop.fs.s3a.S3ALambda.retry(S3ALambda.java:228)
+  at org.apache.hadoop.fs.s3a.S3ALambda.retry(S3ALambda.java:203)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.verifyBucketExists(S3AFileSystem.java:357)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:293)
+  at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3288)
+  at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:123)
+  at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3337)
+  at org.apache.hadoop.fs.FileSystem$Cache.getUnique(FileSystem.java:3311)
+  at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:529)
+  at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool$BucketInfo.run(S3GuardTool.java:997)
+  at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.run(S3GuardTool.java:309)
+  at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
+  at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.run(S3GuardTool.java:1218)
+  at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.main(S3GuardTool.java:1227)
+Caused by: com.amazonaws.services.s3.model.AmazonS3Exception: All access to this object has been disabled
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1638)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1303)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1055)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:743)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:717)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
+  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
+  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4229)
+  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4176)
+  at com.amazonaws.services.s3.AmazonS3Client.getAcl(AmazonS3Client.java:3381)
+  at com.amazonaws.services.s3.AmazonS3Client.getBucketAcl(AmazonS3Client.java:1160)
+  at com.amazonaws.services.s3.AmazonS3Client.getBucketAcl(AmazonS3Client.java:1150)
+  at com.amazonaws.services.s3.AmazonS3Client.doesBucketExist(AmazonS3Client.java:1266)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$verifyBucketExists$1(S3AFileSystem.java:360)
+  at org.apache.hadoop.fs.s3a.S3ALambda.once(S3ALambda.java:120)
+```
+
+Check the name of the bucket is correct, and validate permissions for the active user/role.
+
+### <a name="access_denied_writing"></a> `AccessDeniedException` "Access denied" when trying to manipulate data
 
 Data can be read, but attempts to write data or manipulate the store fail with
 403/Access denied.
 
 The bucket may have an access policy which the request does not comply with.
+or the caller does not have the right to access the data.
 
 ```
 java.nio.file.AccessDeniedException: test/: PUT 0-byte object  on test/:
@@ -257,14 +397,31 @@ java.nio.file.AccessDeniedException: test/: PUT 0-byte object  on test/:
 ```
 
 In the AWS S3 management console, select the "permissions" tab for the bucket, then "bucket policy".
-If there is no bucket policy, then the error cannot be caused by one.
 
 If there is a bucket access policy, e.g. required encryption headers,
 then the settings of the s3a client must guarantee the relevant headers are set
 (e.g. the encryption options match).
 Note: S3 Default Encryption options are not considered here:
 if the bucket policy requires AES256 as the encryption policy on PUT requests,
-then the encryption option must be set in the s3a client so that the header is set.
+then the encryption option must be set in the hadoop client so that the header is set.
+
+
+Otherwise, the problem will likely be that the user does not have full access to the
+operation. Check what they were trying to (read vs write) and then look
+at the permissions of the user/role.
+
+If the client using [assumed roles](assumed_roles.html), and a policy
+is set in `fs.s3a.assumed.role.policy`, then that policy declares
+_all_ the rights which the caller has.
+
+
+### <a name="kms_access_denied"></a>  `AccessDeniedException` when using SSE-KMS
+
+When trying to write or read SEE-KMS-encrypted data, the client gets a
+`java.nio.AccessDeniedException` with the error 403/Forbidden.
+
+The caller does not have the permissions to access
+the key with which the data was encrypted.
 
 ## <a name="connectivity"></a> Connectivity Problems
 
@@ -283,14 +440,14 @@ org.apache.hadoop.fs.s3a.AWSS3IOException: Received permanent redirect response
   addressed using the specified endpoint. Please send all future requests to
   this endpoint. (Service: Amazon S3; Status Code: 301;
   Error Code: PermanentRedirect; Request ID: 7D39EC1021C61B11)
-        at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:132)
-        at org.apache.hadoop.fs.s3a.S3AFileSystem.initMultipartUploads(S3AFileSystem.java:287)
-        at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:203)
-        at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2895)
-        at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:102)
-        at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2932)
-        at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2914)
-        at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:390)
+      at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:132)
+      at org.apache.hadoop.fs.s3a.S3AFileSystem.initMultipartUploads(S3AFileSystem.java:287)
+      at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:203)
+      at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2895)
+      at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:102)
+      at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2932)
+      at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2914)
+      at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:390)
 ```
 
 1. Use the [Specific endpoint of the bucket's S3 service](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region)
@@ -308,12 +465,15 @@ Using the explicit endpoint for the region is recommended for speed and
 to use the V4 signing API.
 
 
-### <a name="timeout"></a> "Timeout waiting for connection from pool" when writing data
+### <a name="timeout_from_pool"></a> "Timeout waiting for connection from pool" when writing data
 
 This happens when using the output stream thread pool runs out of capacity.
 
 ```
-[s3a-transfer-shared-pool1-t20] INFO  http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496)) - Unable to execute HTTP request: Timeout waiting for connection from poolorg.apache.http.conn.ConnectionPoolTimeoutException: Timeout waiting for connection from pool
+[s3a-transfer-shared-pool1-t20] INFO  http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496))
+ - Unable to execute HTTP request:
+  Timeout waiting for connection from poolorg.apache.http.conn.ConnectionPoolTimeoutException:
+   Timeout waiting for connection from pool
   at org.apache.http.impl.conn.PoolingClientConnectionManager.leaseConnection(PoolingClientConnectionManager.java:230)
   at org.apache.http.impl.conn.PoolingClientConnectionManager$1.getConnection(PoolingClientConnectionManager.java:199)
   at sun.reflect.GeneratedMethodAccessor13.invoke(Unknown Source)
@@ -364,6 +524,46 @@ the maximum number of allocated HTTP connections.
 Set `fs.s3a.connection.maximum` to a larger value (and at least as large as
 `fs.s3a.threads.max`)
 
+
+### `NoHttpResponseException`
+
+The HTTP Server did not respond.
+
+```
+2017-02-07 10:01:07,950 INFO [s3a-transfer-shared-pool1-t7] com.amazonaws.http.AmazonHttpClient:
+  Unable to execute HTTP request: bucket.s3.amazonaws.com:443 failed to respond
+org.apache.http.NoHttpResponseException: bucket.s3.amazonaws.com:443 failed to respond
+  at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:143)
+  at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
+  at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261)
+  at org.apache.http.impl.AbstractHttpClientConnection.receiveResponseHeader(AbstractHttpClientConnection.java:283)
+  at org.apache.http.impl.conn.DefaultClientConnection.receiveResponseHeader(DefaultClientConnection.java:259)
+  at org.apache.http.impl.conn.ManagedClientConnectionImpl.receiveResponseHeader(ManagedClientConnectionImpl.java:209)
+  at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272)
+  at com.amazonaws.http.protocol.SdkHttpRequestExecutor.doReceiveResponse(SdkHttpRequestExecutor.java:66)
+  at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124)
+  at org.apache.http.impl.client.DefaultRequestDirector.tryExecute(DefaultRequestDirector.java:686)
+  at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:488)
+  at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:884)
+  at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
+  at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
+  at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:728)
+  at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
+  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310)
+  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785)
+  at com.amazonaws.services.s3.AmazonS3Client.copyPart(AmazonS3Client.java:1731)
+  at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:41)
+  at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:28)
+  at org.apache.hadoop.fs.s3a.SemaphoredDelegatingExecutor$CallableWithPermitRelease.call(SemaphoredDelegatingExecutor.java:222)
+  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
+  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
+  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
+  at java.lang.Thread.run(Thread.java:745)
+```
+
+Probably network problems, unless it really is an outage of S3.
+
+
 ### Out of heap memory when writing with via Fast Upload
 
 This can happen when using the upload buffering mechanism
@@ -418,7 +618,8 @@ for up to date advice.
 org.apache.hadoop.fs.s3a.AWSClientIOException: getFileStatus on test/testname/streaming/:
   com.amazonaws.AmazonClientException: Failed to sanitize XML document
   destined for handler class com.amazonaws.services.s3.model.transform.XmlResponsesSaxParser$ListBucketHandler:
-  Failed to sanitize XML document destined for handler class com.amazonaws.services.s3.model.transform.XmlResponsesSaxParser$ListBucketHandler
+  Failed to sanitize XML document destined for handler class
+   com.amazonaws.services.s3.model.transform.XmlResponsesSaxParser$ListBucketHandler
     at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:105)
     at org.apache.hadoop.fs.s3a.S3AFileSystem.getFileStatus(S3AFileSystem.java:1462)
     at org.apache.hadoop.fs.s3a.S3AFileSystem.innerListStatus(S3AFileSystem.java:1227)
@@ -444,19 +645,136 @@ Again, we believe this is caused by the connection to S3 being broken.
 It may go away if the operation is retried.
 
 
+## <a name="other"></a> Other Errors
+
+### <a name="integrity"></a> `SdkClientException` Unable to verify integrity of data upload
 
-## Miscellaneous Errors
+Something has happened to the data as it was uploaded.
+
+```
+Caused by: org.apache.hadoop.fs.s3a.AWSClientIOException: saving output on dest/_task_tmp.-ext-10000/_tmp.000000_0:
+    com.amazonaws.AmazonClientException: Unable to verify integrity of data upload.
+    Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64)
+    didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3.
+    You may need to delete the data stored in Amazon S3.
+    (metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null,
+    bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0):
+  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:144)
+  at org.apache.hadoop.fs.s3a.S3AOutputStream.close(S3AOutputStream.java:121)
+  at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
+  at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
+  at org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat$1.close(HiveIgnoreKeyTextOutputFormat.java:99)
+  at org.apache.hadoop.hive.ql.exec.FileSinkOperator$FSPaths.closeWriters(FileSinkOperator.java:190)
+  ... 22 more
+Caused by: com.amazonaws.AmazonClientException: Unable to verify integrity of data upload.
+  Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64)
+  didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3.
+  You may need to delete the data stored in Amazon S3.
+  (metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null,
+  bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0)
+  at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1492)
+  at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131)
+  at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123)
+  at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139)
+  at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47)
+  ... 4 more
+```
+
+As it uploads data to S3, the AWS SDK builds up an MD5 checksum of what was
+PUT/POSTed. When S3 returns the checksum of the uploaded data, that is compared
+with the local checksum. If there is a mismatch, this error is reported.
+
+The uploaded data is already on S3 and will stay there, though if this happens
+during a multipart upload, it may not be visible (but still billed: clean up your
+multipart uploads via the `hadoop s3guard uploads` command).
+
+Possible causes for this
+
+1. A (possibly transient) network problem, including hardware faults.
+1. A proxy server is doing bad things to the data.
+1. Some signing problem, especially with third-party S3-compatible object stores.
+
+This is a very, very rare occurrence.
+
+If the problem is a signing one, try changing the signature algorithm.
+
+```xml
+<property>
+  <name>fs.s3a.signing-algorithm</name>
+  <value>S3SignerType</value>
+</property>
+```
+
+We cannot make any promises that it will work,
+only that it has been known to make the problem go away "once"
+
+### `AWSS3IOException` The Content-MD5 you specified did not match what we received
+
+Reads work, but writes, even `mkdir`, fail:
+
+```
+org.apache.hadoop.fs.s3a.AWSS3IOException: copyFromLocalFile(file:/tmp/hello.txt, s3a://bucket/hello.txt)
+    on file:/tmp/hello.txt:
+    The Content-MD5 you specified did not match what we received.
+    (Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225),
+    S3 Extended Request ID: null
+  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:127)
+	at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:69)
+	at org.apache.hadoop.fs.s3a.S3AFileSystem.copyFromLocalFile(S3AFileSystem.java:1494)
+	at org.apache.hadoop.tools.cloudup.Cloudup.uploadOneFile(Cloudup.java:466)
+	at org.apache.hadoop.tools.cloudup.Cloudup.access$000(Cloudup.java:63)
+	at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:353)
+	at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:350)
+	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
+	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
+	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
+	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
+	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
+	at java.lang.Thread.run(Thread.java:748)
+Caused by: com.amazonaws.services.s3.model.AmazonS3Exception:
+    The Content-MD5 you specified did not match what we received.
+    (Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225),
+    S3 Extended Request ID: null
+  at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1307)
+	at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:894)
+	at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:597)
+	at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:363)
+	at com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:329)
+	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:308)
+	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
+	at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1422)
+	at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131)
+	at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123)
+	at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139)
+	at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47)
+	at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239)
+	... 4 more
+```
+
+This stack trace was seen when interacting with a third-party S3 store whose
+expectations of headers related to the AWS V4 signing mechanism was not
+compatible with that of the specific AWS SDK Hadoop was using.
+
+Workaround: revert to V2 signing.
+
+```xml
+<property>
+  <name>fs.s3a.signing-algorithm</name>
+  <value>S3SignerType</value>
+</property>
+```
 
 ### When writing data: "java.io.FileNotFoundException: Completing multi-part upload"
 
 
 A multipart upload was trying to complete, but failed as there was no upload
 with that ID.
+
 ```
 java.io.FileNotFoundException: Completing multi-part upload on fork-5/test/multipart/1c397ca6-9dfb-4ac1-9cf7-db666673246b:
  com.amazonaws.services.s3.model.AmazonS3Exception: The specified upload does not exist.
-  The upload ID may be invalid, or the upload may have been aborted or completed. (Service: Amazon S3; Status Code: 404;
-   Error Code: NoSuchUpload;
+  The upload ID may be invalid, or the upload may have been aborted or completed.
+   (Service: Amazon S3; Status Code: 404; Error Code: NoSuchUpload;
   at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182)
   at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770)
   at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
@@ -482,14 +800,11 @@ for all open writes to complete the write,
 ### Application hangs after reading a number of files
 
 
-
-
-The pool of https client connectons and/or IO threads have been used up,
+The pool of https client connections and/or IO threads have been used up,
 and none are being freed.
 
 
-1. The pools aren't big enough. Increas `fs.s3a.connection.maximum` for
-the http connections, and `fs.s3a.threads.max` for the thread pool.
+1. The pools aren't big enough. See ["Timeout waiting for connection from pool"](#timeout_from_pool)
 2. Likely root cause: whatever code is reading files isn't calling `close()`
 on the input streams. Make sure your code does this!
 And if it's someone else's: make sure you have a recent version; search their
@@ -497,81 +812,13 @@ issue trackers to see if its a known/fixed problem.
 If not, it's time to work with the developers, or come up with a workaround
 (i.e closing the input stream yourself).
 
-### "Timeout waiting for connection from pool"
 
-This the same problem as above, exhibiting itself as the http connection
-pool determining that it has run out of capacity.
-
-```
-
-java.io.InterruptedIOException: getFileStatus on s3a://example/fork-0007/test:
- com.amazonaws.SdkClientException: Unable to execute HTTP request: Timeout waiting for connection from pool
-  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:145)
-  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:119)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.s3GetFileStatus(S3AFileSystem.java:2040)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.checkPathForDirectory(S3AFileSystem.java:1857)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.innerMkdirs(S3AFileSystem.java:1890)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.mkdirs(S3AFileSystem.java:1826)
-  at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:2230)
-  ...
-Caused by: com.amazonaws.SdkClientException: Unable to execute HTTP request: Timeout waiting for connection from pool
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleRetryableException(AmazonHttpClient.java:1069)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1035)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:742)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:716)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
-  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
-  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4221)
-  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4168)
-  at com.amazonaws.services.s3.AmazonS3Client.getObjectMetadata(AmazonS3Client.java:1249)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.getObjectMetadata(S3AFileSystem.java:1162)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.s3GetFileStatus(S3AFileSystem.java:2022)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.checkPathForDirectory(S3AFileSystem.java:1857)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.innerMkdirs(S3AFileSystem.java:1890)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.mkdirs(S3AFileSystem.java:1826)
-  at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:2230)
-...
-Caused by: com.amazonaws.thirdparty.apache.http.conn.ConnectionPoolTimeoutException: Timeout waiting for connection from pool
-  at com.amazonaws.thirdparty.apache.http.impl.conn.PoolingHttpClientConnectionManager.leaseConnection(PoolingHttpClientConnectionManager.java:286)
-  at com.amazonaws.thirdparty.apache.http.impl.conn.PoolingHttpClientConnectionManager$1.get(PoolingHttpClientConnectionManager.java:263)
-  at sun.reflect.GeneratedMethodAccessor10.invoke(Unknown Source)
-  at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
-  at java.lang.reflect.Method.invoke(Method.java:498)
-  at com.amazonaws.http.conn.ClientConnectionRequestFactory$Handler.invoke(ClientConnectionRequestFactory.java:70)
-  at com.amazonaws.http.conn.$Proxy15.get(Unknown Source)
-  at com.amazonaws.thirdparty.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:190)
-  at com.amazonaws.thirdparty.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:184)
-  at com.amazonaws.thirdparty.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:184)
-  at com.amazonaws.thirdparty.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
-  at com.amazonaws.thirdparty.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
-  at com.amazonaws.http.apache.client.impl.SdkHttpClient.execute(SdkHttpClient.java:72)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1190)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1030)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:742)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:716)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
-  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
-  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4221)
-  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4168)
-  at com.amazonaws.services.s3.AmazonS3Client.getObjectMetadata(AmazonS3Client.java:1249)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.getObjectMetadata(S3AFileSystem.java:1162)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.s3GetFileStatus(S3AFileSystem.java:2022)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.checkPathForDirectory(S3AFileSystem.java:1857)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.innerMkdirs(S3AFileSystem.java:1890)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.mkdirs(S3AFileSystem.java:1826)
-  at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:2230)
-```
-
-This is the same problem as the previous one, exhibited differently.
 
 ### Issue: when writing data, HTTP Exceptions logged at info from `AmazonHttpClient`
 
 ```
-[s3a-transfer-shared-pool4-t6] INFO  http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496)) - Unable to execute HTTP request: hwdev-steve-ireland-new.s3.amazonaws.com:443 failed to respond
+[s3a-transfer-shared-pool4-t6] INFO  http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496))
+ - Unable to execute HTTP request: hwdev-steve-ireland-new.s3.amazonaws.com:443 failed to respond
 org.apache.http.NoHttpResponseException: bucket.s3.amazonaws.com:443 failed to respond
   at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:143)
   at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
@@ -606,6 +853,45 @@ will attempt to retry the operation; it may just be a transient event. If there
 are many such exceptions in logs, it may be a symptom of connectivity or network
 problems.
 
+### `AWSBadRequestException` IllegalLocationConstraintException/The unspecified location constraint is incompatible
+
+```
+ Cause: org.apache.hadoop.fs.s3a.AWSBadRequestException: put on :
+  com.amazonaws.services.s3.model.AmazonS3Exception:
+   The unspecified location constraint is incompatible for the region specific
+    endpoint this request was sent to.
+    (Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException;
+
+  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:178)
+  at org.apache.hadoop.fs.s3a.S3ALambda.execute(S3ALambda.java:64)
+  at org.apache.hadoop.fs.s3a.WriteOperationHelper.uploadObject(WriteOperationHelper.java:451)
+  at org.apache.hadoop.fs.s3a.commit.magic.MagicCommitTracker.aboutToComplete(MagicCommitTracker.java:128)
+  at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:373)
+  at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
+  at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:101)
+  at org.apache.hadoop.hive.ql.io.orc.WriterImpl.close(WriterImpl.java:2429)
+  at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:106)
+  at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:91)
+  ...
+  Cause: com.amazonaws.services.s3.model.AmazonS3Exception:
+   The unspecified location constraint is incompatible for the region specific endpoint
+   this request was sent to. (Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException;
+   Request ID: EEBC5A08BCB3A645)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1588)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1258)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1030)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:742)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:716)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
+  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
+  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4221)
+  ...
+```
+
+Something has been trying to write data to "/".
+
 ## File System Semantics
 
 These are the issues where S3 does not appear to behave the way a filesystem
@@ -664,7 +950,7 @@ that it is not there)
 This is a visible sign of updates to the metadata server lagging
 behind the state of the underlying filesystem.
 
-Fix: Use S3Guard
+Fix: Use [S3Guard](s3guard.html).
 
 
 ### File not visible/saved
@@ -686,26 +972,74 @@ and the like. The standard strategy here is to save to HDFS and then copy to S3.
 
 ## <a name="encryption"></a> S3 Server Side Encryption
 
-### Using SSE-KMS "Invalid arn"
+### `AWSS3IOException` `KMS.NotFoundException` "Invalid arn" when using SSE-KMS
 
 When performing file operations, the user may run into an issue where the KMS
 key arn is invalid.
+
 ```
-com.amazonaws.services.s3.model.AmazonS3Exception:
-Invalid arn (Service: Amazon S3; Status Code: 400; Error Code: KMS.NotFoundException; Request ID: 708284CF60EE233F),
-S3 Extended Request ID: iHUUtXUSiNz4kv3Bdk/hf9F+wjPt8GIVvBHx/HEfCBYkn7W6zmpvbA3XT7Y5nTzcZtfuhcqDunw=:
-Invalid arn (Service: Amazon S3; Status Code: 400; Error Code: KMS.NotFoundException; Request ID: 708284CF60EE233F)
+org.apache.hadoop.fs.s3a.AWSS3IOException: innerMkdirs on /test:
+ com.amazonaws.services.s3.model.AmazonS3Exception:
+  Invalid arn (Service: Amazon S3; Status Code: 400; Error Code: KMS.NotFoundException;
+   Request ID: CA89F276B3394565),
+   S3 Extended Request ID: ncz0LWn8zor1cUO2fQ7gc5eyqOk3YfyQLDn2OQNoe5Zj/GqDLggUYz9QY7JhdZHdBaDTh+TL5ZQ=:
+   Invalid arn (Service: Amazon S3; Status Code: 400; Error Code: KMS.NotFoundException; Request ID: CA89F276B3394565)
+  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:194)
+  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:117)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.mkdirs(S3AFileSystem.java:1541)
+  at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:2230)
+  at org.apache.hadoop.fs.contract.AbstractFSContractTestBase.mkdirs(AbstractFSContractTestBase.java:338)
+  at org.apache.hadoop.fs.contract.AbstractFSContractTestBase.setup(AbstractFSContractTestBase.java:193)
+  at org.apache.hadoop.fs.s3a.scale.S3AScaleTestBase.setup(S3AScaleTestBase.java:90)
+  at org.apache.hadoop.fs.s3a.scale.AbstractSTestS3AHugeFiles.setup(AbstractSTestS3AHugeFiles.java:77)
+  at sun.reflect.GeneratedMethodAccessor12.invoke(Unknown Source)
+  at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
+  at java.lang.reflect.Method.invoke(Method.java:498)
+  at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
+  at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
+  at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
+  at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:24)
+  at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
+  at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
+  at org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74)
+Caused by: com.amazonaws.services.s3.model.AmazonS3Exception:
+ Invalid arn (Service: Amazon S3; Status Code: 400; Error Code: KMS.NotFoundException; Request ID: CA89F276B3394565)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1588)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1258)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1030)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:742)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:716)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
+  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
+  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4221)
+  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4168)
+  at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1718)
+  at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:133)
+  at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:125)
+  at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:143)
+  at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:48)
+  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
+  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
+  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
+  at java.lang.Thread.run(Thread.java:745)
 ```
 
-This is due to either, the KMS key id is entered incorrectly, or the KMS key id
-is in a different region than the S3 bucket being used.
+Possible causes:
+
+* the KMS key ARN is entered incorrectly, or
+* the KMS key referenced by the ARN is in a different region than the S3 bucket
+being used.
+
 
 ### Using SSE-C "Bad Request"
 
 When performing file operations the user may run into an unexpected 400/403
 error such as
 ```
-org.apache.hadoop.fs.s3a.AWSS3IOException: getFileStatus on fork-4/: com.amazonaws.services.s3.model.AmazonS3Exception:
+org.apache.hadoop.fs.s3a.AWSS3IOException: getFileStatus on fork-4/:
+ com.amazonaws.services.s3.model.AmazonS3Exception:
 Bad Request (Service: Amazon S3; Status Code: 400;
 Error Code: 400 Bad Request; Request ID: 42F9A1987CB49A99),
 S3 Extended Request ID: jU2kcwaXnWj5APB14Cgb1IKkc449gu2+dhIsW/+7x9J4D+VUkKvu78mBo03oh9jnOT2eoTLdECU=:
@@ -719,83 +1053,49 @@ is used, no encryption is specified, or the SSE-C specified is incorrect.
 2. A directory is encrypted with a SSE-C keyA and the user is trying to move a
 file using configured SSE-C keyB into that structure.
 
-## <a name="performance"></a> Performance
-
-S3 is slower to read data than HDFS, even on virtual clusters running on
-Amazon EC2.
-
-* HDFS replicates data for faster query performance.
-* HDFS stores the data on the local hard disks, avoiding network traffic
- if the code can be executed on that host. As EC2 hosts often have their
- network bandwidth throttled, this can make a tangible difference.
-* HDFS is significantly faster for many "metadata" operations: listing
-the contents of a directory, calling `getFileStatus()` on path,
-creating or deleting directories. (S3Guard reduces but does not eliminate
-the speed gap).
-* On HDFS, Directory renames and deletes are `O(1)` operations. On
-S3 renaming is a very expensive `O(data)` operation which may fail partway through
-in which case the final state depends on where the copy+ delete sequence was when it failed.
-All the objects are copied, then the original set of objects are deleted, so
-a failure should not lose data —it may result in duplicate datasets.
-* Unless fast upload enabled, the write only begins on a `close()` operation.
-This can take so long that some applications can actually time out.
-* File IO involving many seek calls/positioned read calls will encounter
-performance problems due to the size of the HTTP requests made. Enable the
-"random" fadvise policy to alleviate this at the
-expense of sequential read performance and bandwidth.
-
-The slow performance of `rename()` surfaces during the commit phase of work,
-including
-
-* The MapReduce `FileOutputCommitter`. This also used by Apache Spark.
-* DistCp's rename-after-copy operation.
-* The `hdfs fs -rm` command renaming the file under `.Trash` rather than
-deleting it. Use `-skipTrash` to eliminate that step.
-
-These operations can be significantly slower when S3 is the destination
-compared to HDFS or other "real" filesystem.
+## <a name="not_all_bytes_were_read"></a> Message appears in logs "Not all bytes were read from the S3ObjectInputStream"
 
-*Improving S3 load-balancing behavior*
 
-Amazon S3 uses a set of front-end servers to provide access to the underlying data.
-The choice of which front-end server to use is handled via load-balancing DNS
-service: when the IP address of an S3 bucket is looked up, the choice of which
-IP address to return to the client is made based on the the current load
-of the front-end servers.
+This is a message which can be generated by the Amazon SDK when the client application
+calls `abort()` on the HTTP input stream, rather than reading to the end of
+the file/stream and causing `close()`. The S3A client does call `abort()` when
+seeking round large files, [so leading to the message](https://github.com/aws/aws-sdk-java/issues/1211).
 
-Over time, the load across the front-end changes, so those servers considered
-"lightly loaded" will change. If the DNS value is cached for any length of time,
-your application may end up talking to an overloaded server. Or, in the case
-of failures, trying to talk to a server that is no longer there.
+No ASF Hadoop releases have shipped with an SDK which prints this message
+when used by the S3A client. However third party and private builds of Hadoop
+may cause the message to be logged.
 
-And by default, for historical security reasons in the era of applets,
-the DNS TTL of a JVM is "infinity".
+Ignore it. The S3A client does call `abort()`, but that's because our benchmarking
+shows that it is generally more efficient to abort the TCP connection and initiate
+a new one than read to the end of a large file.
 
-To work with AWS better, set the DNS time-to-live of an application which
-works with S3 to something lower. See [AWS documentation](http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/java-dg-jvm-ttl.html).
+Note: the threshold when data is read rather than the stream aborted can be tuned
+by `fs.s3a.readahead.range`; seek policy in `fs.s3a.experimental.fadvise`.
 
-## <a name="network_performance"></a>Troubleshooting network performance
+### <a name="no_such_bucket"></a> `FileNotFoundException` Bucket does not exist.
 
-An example of this is covered in [HADOOP-13871](https://issues.apache.org/jira/browse/HADOOP-13871).
+The bucket does not exist.
 
-1. For public data, use `curl`:
-
-        curl -O https://landsat-pds.s3.amazonaws.com/scene_list.gz
-1. Use `nettop` to monitor a processes connections.
-
-Consider reducing the connection timeout of the s3a connection.
-
-```xml
-<property>
-  <name>fs.s3a.connection.timeout</name>
-  <value>15000</value>
-</property>
 ```
-This *may* cause the client to react faster to network pauses, so display
-stack traces fast. At the same time, it may be less resilient to
-connectivity problems.
+java.io.FileNotFoundException: Bucket stevel45r56666 does not exist
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.verifyBucketExists(S3AFileSystem.java:361)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:293)
+  at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3288)
+  at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:123)
+  at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3337)
+  at org.apache.hadoop.fs.FileSystem$Cache.getUnique(FileSystem.java:3311)
+  at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:529)
+  at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool$BucketInfo.run(S3GuardTool.java:997)
+  at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.run(S3GuardTool.java:309)
+  at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
+  at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.run(S3GuardTool.java:1218)
+  at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.main(S3GuardTool.java:1227)
+```
 
 
+Check the URI. If using a third-party store, verify that you've configured
+the client to talk to the specific server in `fs.s3a.endpoint`.
+
 ## Other Issues
 
 ### <a name="logging"></a> Enabling low-level logging
@@ -816,7 +1116,7 @@ log4j.logger.org.apache.http=DEBUG
 ```
 
 
-This produces a log such as this, wich is for a V4-authenticated PUT of a 0-byte file used
+This produces a log such as this, which is for a V4-authenticated PUT of a 0-byte file used
 as an empty directory marker
 
 ```
@@ -866,9 +1166,9 @@ execchain.MainClientExec (MainClientExec.java:execute(284)) - Connection can be
 
 ## <a name="retries"></a>  Reducing failures by configuring retry policy
 
-The S3A client can ba configured to rety those operations which are considered
-retriable. That can be because they are idempotent, or
-because there failure happened before the request was processed by S3.
+The S3A client can ba configured to retry those operations which are considered
+retryable. That can be because they are idempotent, or
+because the failure happened before the request was processed by S3.
 
 The number of retries and interval between each retry can be configured:
 
@@ -893,8 +1193,8 @@ Not all failures are retried. Specifically excluded are those considered
 unrecoverable:
 
 * Low-level networking: `UnknownHostException`, `NoRouteToHostException`.
-* 302 redirects
-* Missing resources, 404/`FileNotFoundException`
+* 302 redirects.
+* Missing resources, 404/`FileNotFoundException`.
 * HTTP 416 response/`EOFException`. This can surface if the length of a file changes
   while another client is reading it.
 * Failures during execution or result processing of non-idempotent operations where
@@ -910,79 +1210,6 @@ be idempotent, and will retry them on failure. These are only really idempotent
 if no other client is attempting to manipulate the same objects, such as:
 renaming() the directory tree or uploading files to the same location.
 Please don't do that. Given that the emulated directory rename and delete operations
-aren't atomic, even without retries, multiple S3 clients working with the same
+are not atomic, even without retries, multiple S3 clients working with the same
 paths can interfere with each other
 
-#### <a name="retries"></a> Throttling
-
-When many requests are made of a specific S3 bucket (or shard inside it),
-S3 will respond with a 503 "throttled" response.
-Throttling can be recovered from, provided overall load decreases.
-Furthermore, because it is sent before any changes are made to the object store,
-is inherently idempotent. For this reason, the client will always attempt to
-retry throttled requests.
-
-The limit of the number of times a throttled request can be retried,
-and the exponential interval increase between attempts, can be configured
-independently of the other retry limits.
-
-```xml
-<property>
-  <name>fs.s3a.retry.throttle.limit</name>
-  <value>20</value>
-  <description>
-    Number of times to retry any throttled request.
-  </description>
-</property>
-
-<property>
-  <name>fs.s3a.retry.throttle.interval</name>
-  <value>500ms</value>
-  <description>
-    Interval between retry attempts on throttled requests.
-  </description>
-</property>
-```
-
-If a client is failing due to `AWSServiceThrottledException` failures,
-increasing the interval and limit *may* address this. However, it
-it is a sign of AWS services being overloaded by the sheer number of clients
-and rate of requests. Spreading data across different buckets, and/or using
-a more balanced directory structure may be beneficial.
-Consult [the AWS documentation](http://docs.aws.amazon.com/AmazonS3/latest/dev/request-rate-perf-considerations.html).
-
-Reading or writing data encrypted with SSE-KMS forces S3 to make calls of
-the AWS KMS Key Management Service, which comes with its own
-[Request Rate Limits](http://docs.aws.amazon.com/kms/latest/developerguide/limits.html).
-These default to 1200/second for an account, across all keys and all uses of
-them, which, for S3 means: across all buckets with data encrypted with SSE-KMS.
-
-###### Tips to Keep Throttling down
-
-* If you are seeing a lot of throttling responses on a large scale
-operation like a `distcp` copy, *reduce* the number of processes trying
-to work with the bucket (for distcp: reduce the number of mappers with the
-`-m` option).
-
-* If you are reading or writing lists of files, if you can randomize
-the list so they are not processed in a simple sorted order, you may
-reduce load on a specific shard of S3 data, so potentially increase throughput.
-
-* An S3 Bucket is throttled by requests coming from all
-simultaneous clients. Different applications and jobs may interfere with
-each other: consider that when troubleshooting.
-Partitioning data into different buckets may help isolate load here.
-
-* If you are using data encrypted with SSE-KMS, then the
-will also apply: these are stricter than the S3 numbers.
-If you believe that you are reaching these limits, you may be able to
-get them increased.
-Consult [the KMS Rate Limit documentation](http://docs.aws.amazon.com/kms/latest/developerguide/limits.html).
-
-* S3Guard uses DynamoDB for directory and file lookups;
-it is rate limited to the amount of (guaranteed) IO purchased for a
-table. If significant throttling events/rate is observed here, the preallocated
-IOPs can be increased with the `s3guard set-capacity` command, or
-through the AWS Console. Throttling events in S3Guard are noted in logs, and
-also in the S3A metrics `s3guard_metadatastore_throttle_rate` and
-`s3guard_metadatastore_throttled`.


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[17/21] hadoop git commit: MAPREDUCE-7053: Timed out tasks can fail to produce thread dump. Contributed by Jason Lowe.

Posted by ha...@apache.org.

MAPREDUCE-7053: Timed out tasks can fail to produce thread dump. Contributed by Jason Lowe.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/82f029f7
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/82f029f7
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/82f029f7

Branch: refs/heads/HDFS-12996
Commit: 82f029f7b50679ea477a3a898e4ee400fa394adf
Parents: a1e05e0
Author: Eric Payne <ep...@apache.org>
Authored: Fri Feb 16 08:15:09 2018 -0600
Committer: Eric Payne <ep...@apache.org>
Committed: Fri Feb 16 08:15:09 2018 -0600

----------------------------------------------------------------------
 .../hadoop/mapred/TaskAttemptListenerImpl.java  | 15 +++--
 .../mapreduce/v2/app/TaskHeartbeatHandler.java  | 67 ++++++++++++++------
 .../mapred/TestTaskAttemptListenerImpl.java     | 54 ++++++++++++++--
 .../v2/app/TestTaskHeartbeatHandler.java        | 38 +++++++++++
 4 files changed, 145 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/82f029f7/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
index 668d8ed..b04dac5 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
@@ -370,17 +370,22 @@ public class TaskAttemptListenerImpl extends CompositeService
         TypeConverter.toYarn(taskAttemptID);
 
     AMFeedback feedback = new AMFeedback();
+    feedback.setTaskFound(true);
+
     AtomicReference<TaskAttemptStatus> lastStatusRef =
         attemptIdToStatus.get(yarnAttemptID);
     if (lastStatusRef == null) {
-      LOG.error("Status update was called with illegal TaskAttemptId: "
-          + yarnAttemptID);
-      feedback.setTaskFound(false);
+      // The task is not known, but it could be in the process of tearing
+      // down gracefully or receiving a thread dump signal. Tolerate unknown
+      // tasks as long as they have unregistered recently.
+      if (!taskHeartbeatHandler.hasRecentlyUnregistered(yarnAttemptID)) {
+        LOG.error("Status update was called with illegal TaskAttemptId: "
+            + yarnAttemptID);
+        feedback.setTaskFound(false);
+      }
       return feedback;
     }
 
-    feedback.setTaskFound(true);
-
     // Propagating preemption to the task if TASK_PREEMPTION is enabled
     if (getConfig().getBoolean(MRJobConfig.TASK_PREEMPTION, false)
         && preemptionPolicy.isPreempted(yarnAttemptID)) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/82f029f7/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java
index c438b35..f8f5015 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java
@@ -71,12 +71,14 @@ public class TaskHeartbeatHandler extends AbstractService {
   private Thread lostTaskCheckerThread;
   private volatile boolean stopped;
   private long taskTimeOut;
+  private long unregisterTimeOut;
   private int taskTimeOutCheckInterval = 30 * 1000; // 30 seconds.
 
   private final EventHandler eventHandler;
   private final Clock clock;
   
   private ConcurrentMap<TaskAttemptId, ReportTime> runningAttempts;
+  private ConcurrentMap<TaskAttemptId, ReportTime> recentlyUnregisteredAttempts;
 
   public TaskHeartbeatHandler(EventHandler eventHandler, Clock clock,
       int numThreads) {
@@ -85,6 +87,8 @@ public class TaskHeartbeatHandler extends AbstractService {
     this.clock = clock;
     runningAttempts =
       new ConcurrentHashMap<TaskAttemptId, ReportTime>(16, 0.75f, numThreads);
+    recentlyUnregisteredAttempts =
+        new ConcurrentHashMap<TaskAttemptId, ReportTime>(16, 0.75f, numThreads);
   }
 
   @Override
@@ -92,6 +96,8 @@ public class TaskHeartbeatHandler extends AbstractService {
     super.serviceInit(conf);
     taskTimeOut = conf.getLong(
         MRJobConfig.TASK_TIMEOUT, MRJobConfig.DEFAULT_TASK_TIMEOUT_MILLIS);
+    unregisterTimeOut = conf.getLong(MRJobConfig.TASK_EXIT_TIMEOUT,
+        MRJobConfig.TASK_EXIT_TIMEOUT_DEFAULT);
 
     // enforce task timeout is at least twice as long as task report interval
     long taskProgressReportIntervalMillis = MRJobConfUtil.
@@ -140,6 +146,12 @@ public class TaskHeartbeatHandler extends AbstractService {
 
   public void unregister(TaskAttemptId attemptID) {
     runningAttempts.remove(attemptID);
+    recentlyUnregisteredAttempts.put(attemptID,
+        new ReportTime(clock.getTime()));
+  }
+
+  public boolean hasRecentlyUnregistered(TaskAttemptId attemptID) {
+    return recentlyUnregisteredAttempts.containsKey(attemptID);
   }
 
   private class PingChecker implements Runnable {
@@ -147,27 +159,9 @@ public class TaskHeartbeatHandler extends AbstractService {
     @Override
     public void run() {
       while (!stopped && !Thread.currentThread().isInterrupted()) {
-        Iterator<Map.Entry<TaskAttemptId, ReportTime>> iterator =
-            runningAttempts.entrySet().iterator();
-
-        // avoid calculating current time everytime in loop
         long currentTime = clock.getTime();
-
-        while (iterator.hasNext()) {
-          Map.Entry<TaskAttemptId, ReportTime> entry = iterator.next();
-          boolean taskTimedOut = (taskTimeOut > 0) &&
-              (currentTime > (entry.getValue().getLastProgress() + taskTimeOut));
-           
-          if(taskTimedOut) {
-            // task is lost, remove from the list and raise lost event
-            iterator.remove();
-            eventHandler.handle(new TaskAttemptDiagnosticsUpdateEvent(entry
-                .getKey(), "AttemptID:" + entry.getKey().toString()
-                + " Timed out after " + taskTimeOut / 1000 + " secs"));
-            eventHandler.handle(new TaskAttemptEvent(entry.getKey(),
-                TaskAttemptEventType.TA_TIMED_OUT));
-          }
-        }
+        checkRunning(currentTime);
+        checkRecentlyUnregistered(currentTime);
         try {
           Thread.sleep(taskTimeOutCheckInterval);
         } catch (InterruptedException e) {
@@ -176,6 +170,39 @@ public class TaskHeartbeatHandler extends AbstractService {
         }
       }
     }
+
+    private void checkRunning(long currentTime) {
+      Iterator<Map.Entry<TaskAttemptId, ReportTime>> iterator =
+          runningAttempts.entrySet().iterator();
+
+      while (iterator.hasNext()) {
+        Map.Entry<TaskAttemptId, ReportTime> entry = iterator.next();
+        boolean taskTimedOut = (taskTimeOut > 0) &&
+            (currentTime > (entry.getValue().getLastProgress() + taskTimeOut));
+
+        if(taskTimedOut) {
+          // task is lost, remove from the list and raise lost event
+          iterator.remove();
+          eventHandler.handle(new TaskAttemptDiagnosticsUpdateEvent(entry
+              .getKey(), "AttemptID:" + entry.getKey().toString()
+              + " Timed out after " + taskTimeOut / 1000 + " secs"));
+          eventHandler.handle(new TaskAttemptEvent(entry.getKey(),
+              TaskAttemptEventType.TA_TIMED_OUT));
+        }
+      }
+    }
+
+    private void checkRecentlyUnregistered(long currentTime) {
+      Iterator<ReportTime> iterator =
+          recentlyUnregisteredAttempts.values().iterator();
+      while (iterator.hasNext()) {
+        ReportTime unregisteredTime = iterator.next();
+        if (currentTime >
+            unregisteredTime.getLastProgress() + unregisterTimeOut) {
+          iterator.remove();
+        }
+      }
+    }
   }
 
   @VisibleForTesting

http://git-wip-us.apache.org/repos/asf/hadoop/blob/82f029f7/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java
index da7421b..068ebfa 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java
@@ -17,6 +17,7 @@
 */
 package org.apache.hadoop.mapred;
 
+import com.google.common.base.Supplier;
 import org.apache.hadoop.mapred.Counters.Counter;
 import org.apache.hadoop.mapreduce.checkpoint.EnumCounter;
 
@@ -51,11 +52,13 @@ import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
 import org.apache.hadoop.mapreduce.v2.app.rm.preemption.CheckpointAMPreemptionPolicy;
 import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler;
 import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.util.ControlledClock;
 import org.apache.hadoop.yarn.util.SystemClock;
 import org.junit.After;
 import org.junit.Test;
@@ -488,14 +491,57 @@ public class TestTaskAttemptListenerImpl {
   }
 
   @Test
-  public void testStatusUpdateFromUnregisteredTask()
-      throws IOException, InterruptedException{
+  public void testStatusUpdateFromUnregisteredTask() throws Exception {
     configureMocks();
-    startListener(false);
+    ControlledClock clock = new ControlledClock();
+    clock.setTime(0);
+    doReturn(clock).when(appCtx).getClock();
+
+    final TaskAttemptListenerImpl tal = new TaskAttemptListenerImpl(appCtx,
+        secret, rmHeartbeatHandler, policy) {
+      @Override
+      protected void startRpcServer() {
+        // Empty
+      }
+      @Override
+      protected void stopRpcServer() {
+        // Empty
+      }
+    };
 
-    AMFeedback feedback = listener.statusUpdate(attemptID, firstReduceStatus);
+    Configuration conf = new Configuration();
+    conf.setLong(MRJobConfig.TASK_TIMEOUT_CHECK_INTERVAL_MS, 1);
+    tal.init(conf);
+    tal.start();
 
+    AMFeedback feedback = tal.statusUpdate(attemptID, firstReduceStatus);
     assertFalse(feedback.getTaskFound());
+    tal.registerPendingTask(task, wid);
+    tal.registerLaunchedTask(attemptId, wid);
+    feedback = tal.statusUpdate(attemptID, firstReduceStatus);
+    assertTrue(feedback.getTaskFound());
+
+    // verify attempt is still reported as found if recently unregistered
+    tal.unregister(attemptId, wid);
+    feedback = tal.statusUpdate(attemptID, firstReduceStatus);
+    assertTrue(feedback.getTaskFound());
+
+    // verify attempt is not found if not recently unregistered
+    long unregisterTimeout = conf.getLong(MRJobConfig.TASK_EXIT_TIMEOUT,
+        MRJobConfig.TASK_EXIT_TIMEOUT_DEFAULT);
+    clock.setTime(unregisterTimeout + 1);
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        try {
+          AMFeedback response =
+              tal.statusUpdate(attemptID, firstReduceStatus);
+          return !response.getTaskFound();
+        } catch (Exception e) {
+          throw new RuntimeException("status update failed", e);
+        }
+      }
+    }, 10, 10000);
   }
 
   private void configureMocks() {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/82f029f7/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestTaskHeartbeatHandler.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestTaskHeartbeatHandler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestTaskHeartbeatHandler.java
index 2623849..5d86479 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestTaskHeartbeatHandler.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestTaskHeartbeatHandler.java
@@ -23,6 +23,7 @@ import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 
+import com.google.common.base.Supplier;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
@@ -30,10 +31,12 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
 import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.util.Clock;
+import org.apache.hadoop.yarn.util.ControlledClock;
 import org.apache.hadoop.yarn.util.SystemClock;
 import org.junit.Assert;
 import org.junit.Test;
@@ -105,6 +108,41 @@ public class TestTaskHeartbeatHandler {
     verifyTaskTimeoutConfig(conf, expectedTimeout);
   }
 
+  @Test
+  public void testTaskUnregistered() throws Exception {
+    EventHandler mockHandler = mock(EventHandler.class);
+    ControlledClock clock = new ControlledClock();
+    clock.setTime(0);
+    final TaskHeartbeatHandler hb =
+        new TaskHeartbeatHandler(mockHandler, clock, 1);
+    Configuration conf = new Configuration();
+    conf.setInt(MRJobConfig.TASK_TIMEOUT_CHECK_INTERVAL_MS, 1);
+    hb.init(conf);
+    hb.start();
+    try {
+      ApplicationId appId = ApplicationId.newInstance(0l, 5);
+      JobId jobId = MRBuilderUtils.newJobId(appId, 4);
+      TaskId tid = MRBuilderUtils.newTaskId(jobId, 3, TaskType.MAP);
+      final TaskAttemptId taid = MRBuilderUtils.newTaskAttemptId(tid, 2);
+      Assert.assertFalse(hb.hasRecentlyUnregistered(taid));
+      hb.register(taid);
+      Assert.assertFalse(hb.hasRecentlyUnregistered(taid));
+      hb.unregister(taid);
+      Assert.assertTrue(hb.hasRecentlyUnregistered(taid));
+      long unregisterTimeout = conf.getLong(MRJobConfig.TASK_EXIT_TIMEOUT,
+          MRJobConfig.TASK_EXIT_TIMEOUT_DEFAULT);
+      clock.setTime(unregisterTimeout + 1);
+      GenericTestUtils.waitFor(new Supplier<Boolean>() {
+        @Override
+        public Boolean get() {
+          return !hb.hasRecentlyUnregistered(taid);
+        }
+      }, 10, 10000);
+    } finally {
+      hb.stop();
+    }
+  }
+
   /**
    * Test if task timeout is set properly in response to the configuration of
    * the task progress report interval.


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[09/21] hadoop git commit: MAPREDUCE-7052. TestFixedLengthInputFormat#testFormatCompressedIn is flaky. Contributed by Peter Bacsko

Posted by ha...@apache.org.

MAPREDUCE-7052. TestFixedLengthInputFormat#testFormatCompressedIn is flaky. Contributed by Peter Bacsko


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a53d62ab
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a53d62ab
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a53d62ab

Branch: refs/heads/HDFS-12996
Commit: a53d62ab26e170a0338f93e228718da52e9196e4
Parents: da59acd
Author: Jason Lowe <jl...@apache.org>
Authored: Thu Feb 15 15:12:57 2018 -0600
Committer: Jason Lowe <jl...@apache.org>
Committed: Thu Feb 15 15:12:57 2018 -0600

----------------------------------------------------------------------
 .../java/org/apache/hadoop/mapred/TestFixedLengthInputFormat.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/a53d62ab/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFixedLengthInputFormat.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFixedLengthInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFixedLengthInputFormat.java
index 4864dd0..5134729 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFixedLengthInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFixedLengthInputFormat.java
@@ -301,7 +301,7 @@ public class TestFixedLengthInputFormat {
       if (i > 0) {
         if (i == (MAX_TESTS-1)) {
           // Test a split size that is less than record len
-          numSplits = (int)(fileSize/Math.floor(recordLength/2));
+          numSplits = (int)(fileSize/ Math.max(1, Math.floor(recordLength/2)));
         } else {
           if (MAX_TESTS % i == 0) {
             // Let us create a split size that is forced to be 


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[19/21] hadoop git commit: HDFS-13153. Enable HDFS diskbalancer by default. Contributed by Ajay Kumar.

Posted by ha...@apache.org.

HDFS-13153. Enable HDFS diskbalancer by default. Contributed by Ajay Kumar.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8d5ea747
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8d5ea747
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8d5ea747

Branch: refs/heads/HDFS-12996
Commit: 8d5ea7470a3225319e3bef5626b837572c2e0d3c
Parents: 7ac8824
Author: Arpit Agarwal <ar...@apache.org>
Authored: Fri Feb 16 09:38:34 2018 -0800
Committer: Arpit Agarwal <ar...@apache.org>
Committed: Fri Feb 16 09:38:34 2018 -0800

----------------------------------------------------------------------
 .../src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java      | 2 +-
 .../hadoop-hdfs/src/main/resources/hdfs-default.xml              | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/8d5ea747/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index c0ad4ec..0828957 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -1146,7 +1146,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   // Disk Balancer Keys
   public static final String DFS_DISK_BALANCER_ENABLED =
       "dfs.disk.balancer.enabled";
-  public static final boolean DFS_DISK_BALANCER_ENABLED_DEFAULT = false;
+  public static final boolean DFS_DISK_BALANCER_ENABLED_DEFAULT = true;
 
   public static final String DFS_DISK_BALANCER_MAX_DISK_THROUGHPUT =
       "dfs.disk.balancer.max.disk.throughputInMBperSec";

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8d5ea747/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index f6d232e..b61c418 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -4633,10 +4633,10 @@
 
   <property>
     <name>dfs.disk.balancer.enabled</name>
-    <value>false</value>
+    <value>true</value>
     <description>
         This enables the diskbalancer feature on a cluster. By default, disk
-      balancer is disabled.
+      balancer is enabled.
     </description>
   </property>
 


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[14/21] hadoop git commit: YARN-7292. Retrospect Resource Profile Behavior for overriding capability. Contributed by Wangda Tan.

Posted by ha...@apache.org.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestProfileCapability.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestProfileCapability.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestProfileCapability.java
deleted file mode 100644
index cbad3f4..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestProfileCapability.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.api;
-
-import com.google.common.collect.ImmutableMap;
-import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.api.records.ResourceInformation;
-import org.apache.hadoop.yarn.util.resource.ResourceUtils;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Test profile capability behavior.
- */
-public class TestProfileCapability {
-  @Before
-  public void setup() {
-    // Initialize resource map
-    Map<String, ResourceInformation> riMap = new HashMap<>();
-
-    // Initialize mandatory resources
-    riMap.put(ResourceInformation.MEMORY_URI, ResourceInformation.MEMORY_MB);
-    riMap.put(ResourceInformation.VCORES_URI, ResourceInformation.VCORES);
-
-    for (int i = 0; i < 5; i++) {
-      String resourceName = "res-" + i;
-      riMap.put(resourceName, ResourceInformation
-          .newInstance(resourceName, "", 0, ResourceTypes.COUNTABLE, 0,
-              Integer.MAX_VALUE));
-    }
-
-    ResourceUtils.initializeResourcesFromResourceInformationMap(riMap);
-  }
-
-  @Test
-  public void testConvertProfileCapabilityToResource() {
-    Resource profile1 = Resource.newInstance(1, 1);
-    profile1.setResourceValue("res-0", 1);
-    profile1.setResourceValue("res-1", 1);
-
-    Resource profile2 = Resource.newInstance(2, 2);
-    profile2.setResourceValue("res-0", 2);
-    profile2.setResourceValue("res-1", 2);
-
-    Resource profile3 = Resource.newInstance(3, 3);
-    profile3.setResourceValue("res-0", 3);
-    profile3.setResourceValue("res-1", 3);
-
-    Map<String, Resource> profiles = ImmutableMap.of("profile1", profile1,
-        "profile2", profile2, "profile3", profile3, "default", profile1);
-
-    // Test case 1, set override value to (1, 1, 0), since we only allow
-    // overwrite for positive value, it is still profile1.
-    ProfileCapability pc = ProfileCapability.newInstance("profile1",
-        Resource.newInstance(1, 1));
-    Assert.assertEquals(profile1, ProfileCapability.toResource(pc, profiles));
-
-    // Test case 2, similarly, negative value won't be respected.
-    pc = ProfileCapability.newInstance("profile1",
-        Resource.newInstance(1, -1));
-    Assert.assertEquals(profile1, ProfileCapability.toResource(pc, profiles));
-
-    // Test case 3, do overwrite for memory and vcores, the result is (3,3,1,1)
-    Resource expected = Resource.newInstance(3, 3);
-    expected.setResourceValue("res-0", 1);
-    expected.setResourceValue("res-1", 1);
-    pc = ProfileCapability.newInstance("profile1",
-        Resource.newInstance(3, 3));
-    Assert.assertEquals(expected, ProfileCapability.toResource(pc, profiles));
-
-    // Test case 3, do overwrite for mem and res-1, the result is (3,1,3,1)
-    expected = Resource.newInstance(3, 1);
-    expected.setResourceValue("res-0", 3);
-    expected.setResourceValue("res-1", 1);
-
-    Resource overwrite = Resource.newInstance(3, 0);
-    overwrite.setResourceValue("res-0", 3);
-    overwrite.setResourceValue("res-1", 0);
-
-    pc = ProfileCapability.newInstance("profile1", overwrite);
-    Assert.assertEquals(expected, ProfileCapability.toResource(pc, profiles));
-
-    // Test case 4, when null profile is specified, use default.
-    pc = ProfileCapability.newInstance("", null);
-    Assert.assertEquals(profile1, ProfileCapability.toResource(pc, profiles));
-  }
-}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java
index 83f912f..4ba1bdf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java
@@ -384,7 +384,6 @@ public class BuilderUtils {
     request.setNumContainers(r.getNumContainers());
     request.setNodeLabelExpression(r.getNodeLabelExpression());
     request.setExecutionTypeRequest(r.getExecutionTypeRequest());
-    request.setProfileCapability(r.getProfileCapability());
     return request;
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java
index 4934243..33451295 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java
@@ -47,7 +47,6 @@ import org.apache.hadoop.yarn.api.records.ContainerUpdateType;
 import org.apache.hadoop.yarn.api.records.ExecutionType;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.NodeState;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
 import org.apache.hadoop.yarn.api.records.QueueInfo;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest;
@@ -67,7 +66,6 @@ import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.security.YarnAuthorizationProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
-import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceProfilesManager;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt
     .RMAppAttemptState;
@@ -627,37 +625,6 @@ public class RMServerUtils {
     }
   }
 
-  public static void convertProfileToResourceCapability(ResourceRequest ask,
-      Configuration conf, ResourceProfilesManager resourceProfilesManager)
-      throws YarnException {
-
-    if (LOG_HANDLE.isDebugEnabled()) {
-      LOG_HANDLE
-          .debug("Converting profile to resource capability for ask " + ask);
-    }
-
-    boolean profilesEnabled =
-        conf.getBoolean(YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED,
-            YarnConfiguration.DEFAULT_RM_RESOURCE_PROFILES_ENABLED);
-    if (!profilesEnabled) {
-      if (ask.getProfileCapability() != null && !ask.getProfileCapability()
-          .getProfileCapabilityOverride().equals(Resources.none())) {
-        ask.setCapability(
-            ask.getProfileCapability().getProfileCapabilityOverride());
-      }
-    } else {
-      if (ask.getProfileCapability() != null) {
-        ask.setCapability(ProfileCapability
-            .toResource(ask.getProfileCapability(),
-                resourceProfilesManager.getResourceProfiles()));
-      }
-    }
-    if (LOG_HANDLE.isDebugEnabled()) {
-      LOG_HANDLE
-          .debug("Converted profile to resource capability for ask " + ask);
-    }
-  }
-
   public static Long getOrDefault(Map<String, Long> map, String key,
       Long defaultValue) {
     if (map.containsKey(key)) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
index 0080a29..27563d6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
@@ -40,12 +40,10 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException;
 import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
-import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.security.AccessType;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
 import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
@@ -270,14 +268,6 @@ public class SchedulerUtils {
   private static void validateResourceRequest(ResourceRequest resReq,
       Resource maximumResource, QueueInfo queueInfo, RMContext rmContext)
       throws InvalidResourceRequestException {
-    try {
-      RMServerUtils.convertProfileToResourceCapability(resReq,
-          rmContext.getYarnConfiguration(),
-          rmContext.getResourceProfilesManager());
-    } catch (YarnException ye) {
-      throw new InvalidResourceRequestException(ye);
-    }
-
     Resource requestedResource = resReq.getCapability();
     for (int i = 0; i < ResourceUtils.getNumberOfKnownResourceTypes(); i++) {
       ResourceInformation reqRI = requestedResource.getResourceInformation(i);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java
index ceaf236..90e4be8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java
@@ -44,7 +44,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRespons
 import org.apache.hadoop.yarn.api.protocolrecords
     .RegisterApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateRequestPBImpl;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.Container;
@@ -52,7 +51,6 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerUpdateType;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceInformation;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
@@ -64,8 +62,6 @@ import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes;
 import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
-import org.apache.hadoop.yarn.server.resourcemanager.resource.MockResourceProfileManager;
-import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceProfilesManager;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.TestResourceProfiles;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
@@ -73,13 +69,11 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptS
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestUtils;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/MockResourceProfileManager.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/MockResourceProfileManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/MockResourceProfileManager.java
deleted file mode 100644
index b75f925..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/MockResourceProfileManager.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.server.resourcemanager.resource;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.exceptions.YARNFeatureNotEnabledException;
-import org.apache.hadoop.yarn.exceptions.YarnException;
-import org.apache.hadoop.yarn.util.resource.ResourceUtils;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Mock ResourceProfileManager for unit test.
- */
-public class MockResourceProfileManager extends ResourceProfilesManagerImpl {
-  private Map<String, Resource> profiles;
-  private boolean featureEnabled;
-
-  public MockResourceProfileManager(Map<String, Resource> profiles) {
-    this.profiles = new HashMap<>();
-    this.profiles.putAll(profiles);
-
-    // Set minimum / maximum allocation so test doesn't need to add them
-    // every time.
-    this.profiles.put(ResourceProfilesManagerImpl.MINIMUM_PROFILE,
-        ResourceUtils.getResourceTypesMinimumAllocation());
-    this.profiles.put(ResourceProfilesManagerImpl.MAXIMUM_PROFILE,
-        ResourceUtils.getResourceTypesMaximumAllocation());
-  }
-
-  @Override
-  public void init(Configuration config) throws IOException {
-    this.featureEnabled = config.getBoolean(
-        YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED,
-        YarnConfiguration.DEFAULT_RM_RESOURCE_PROFILES_ENABLED);
-  }
-
-  @Override
-  public Resource getProfile(String profile) throws YarnException {
-    if (!featureEnabled) {
-      throw new YARNFeatureNotEnabledException("");
-    }
-    return profiles.get(profile);
-  }
-
-  @Override
-  public Map<String, Resource> getResourceProfiles()
-      throws YARNFeatureNotEnabledException {
-    if (!featureEnabled) {
-      throw new YARNFeatureNotEnabledException("");
-    }
-    return profiles;
-  }
-
-  @Override
-  public void reloadProfiles() throws IOException {
-    throw new IOException("Not supported");
-  }
-}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aae62991/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerWithMultiResourceTypes.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerWithMultiResourceTypes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerWithMultiResourceTypes.java
index ea29f7f..3a8d33a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerWithMultiResourceTypes.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerWithMultiResourceTypes.java
@@ -18,27 +18,13 @@
 
 package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
 
-import com.google.common.collect.ImmutableMap;
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes;
-import org.apache.hadoop.yarn.api.records.ProfileCapability;
-import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceInformation;
-import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
-import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
-import org.apache.hadoop.yarn.server.resourcemanager.MockAM;
-import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
 import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
-import org.apache.hadoop.yarn.server.resourcemanager.resource.MockResourceProfileManager;
-import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceProfilesManager;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.TestResourceProfiles;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
-import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
-import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
 import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator;
 import org.apache.hadoop.yarn.util.resource.ResourceUtils;
@@ -46,7 +32,6 @@ import org.junit.Assert;
 import org.junit.Test;
 
 import java.io.IOException;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -55,101 +40,6 @@ import java.util.Map;
  */
 public class TestCapacitySchedulerWithMultiResourceTypes {
   private static String RESOURCE_1 = "res1";
-  private final int GB = 1024;
-
-  @Test
-  public void testBasicCapacitySchedulerWithProfile() throws Exception {
-
-    // Initialize resource map
-    Map<String, ResourceInformation> riMap = new HashMap<>();
-
-    // Initialize mandatory resources
-    ResourceInformation memory = ResourceInformation.newInstance(
-        ResourceInformation.MEMORY_MB.getName(),
-        ResourceInformation.MEMORY_MB.getUnits(),
-        YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
-        YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB);
-    ResourceInformation vcores = ResourceInformation.newInstance(
-        ResourceInformation.VCORES.getName(),
-        ResourceInformation.VCORES.getUnits(),
-        YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES,
-        YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES);
-    riMap.put(ResourceInformation.MEMORY_URI, memory);
-    riMap.put(ResourceInformation.VCORES_URI, vcores);
-    riMap.put(RESOURCE_1, ResourceInformation.newInstance(RESOURCE_1, "", 0,
-        ResourceTypes.COUNTABLE, 0, Integer.MAX_VALUE));
-
-    ResourceUtils.initializeResourcesFromResourceInformationMap(riMap);
-
-    CapacitySchedulerConfiguration csconf =
-        new CapacitySchedulerConfiguration();
-    csconf.setMaximumApplicationMasterResourcePerQueuePercent("root", 100.0f);
-    csconf.setMaximumAMResourcePercentPerPartition("root", "", 100.0f);
-    csconf.setMaximumApplicationMasterResourcePerQueuePercent("root.default",
-        100.0f);
-    csconf.setMaximumAMResourcePercentPerPartition("root.default", "", 100.0f);
-    csconf.setResourceComparator(DominantResourceCalculator.class);
-
-    YarnConfiguration conf = new YarnConfiguration(csconf);
-    // Don't reset resource types since we have already configured resource
-    // types
-    conf.setBoolean(TestResourceProfiles.TEST_CONF_RESET_RESOURCE_TYPES, false);
-    conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
-        ResourceScheduler.class);
-    conf.setBoolean(YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED, true);
-
-    final MockResourceProfileManager mrpm = new MockResourceProfileManager(
-        ImmutableMap.of("res-1", TestUtils
-            .createResource(2 * GB, 2, ImmutableMap.of(RESOURCE_1, 2))));
-
-    MockRM rm = new MockRM(conf) {
-      @Override
-      protected ResourceProfilesManager createResourceProfileManager() {
-        return mrpm;
-      }
-    };
-    rm.start();
-
-    CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
-    LeafQueue leafQueue = (LeafQueue) cs.getQueue("default");
-
-    MockNM nm1 = rm.registerNode("h1:1234",
-        TestUtils.createResource(8 * GB, 8, ImmutableMap.of(RESOURCE_1, 8)));
-
-    RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
-    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1);
-
-    Assert.assertEquals(Resource.newInstance(1 * GB, 1),
-        leafQueue.getUsedResources());
-
-    RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
-
-    // Now request resource:
-    am1.allocate(Arrays.asList(ResourceRequest.newBuilder().capability(
-        Resource.newInstance(1 * GB, 1)).numContainers(1).resourceName("*")
-            .profileCapability(ProfileCapability
-                .newInstance("res-1",
-                    Resource.newInstance(2 * GB, 2))).build()),
-        null);
-
-    // Do node heartbeats 1 time and check container allocated.
-    cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
-
-    // Now used resource = <mem=1GB, vcore=1> + <mem=2GB,vcore=2,res_1=2>
-    Assert.assertEquals(
-        TestUtils.createResource(3 * GB, 3, ImmutableMap.of(RESOURCE_1, 2)),
-        leafQueue.getUsedResources());
-
-    // Acquire container
-    AllocateResponse amResponse = am1.allocate(null, null);
-    Assert.assertFalse(amResponse.getAllocatedContainers().isEmpty());
-    ContainerTokenIdentifier containerTokenIdentifier =
-        BuilderUtils.newContainerTokenIdentifier(
-            amResponse.getAllocatedContainers().get(0).getContainerToken());
-    Assert.assertEquals(
-        TestUtils.createResource(2 * GB, 2, ImmutableMap.of(RESOURCE_1, 2)),
-        containerTokenIdentifier.getResource());
-  }
 
   @Test
   public void testMaximumAllocationRefreshWithMultipleResourceTypes() throws Exception {


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[18/21] hadoop git commit: HADOOP-14507. Extend per-bucket secret key config with explicit getPassword() on fs.s3a.$bucket.secret.key. Contributed by Steve Loughran.

Posted by ha...@apache.org.

HADOOP-14507. Extend per-bucket secret key config with explicit getPassword() on fs.s3a.$bucket.secret.key.
Contributed by Steve Loughran.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/7ac88244
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/7ac88244
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/7ac88244

Branch: refs/heads/HDFS-12996
Commit: 7ac88244c54ce483729af3d2736d9f4731e230ca
Parents: 82f029f
Author: Steve Loughran <st...@apache.org>
Authored: Fri Feb 16 16:37:06 2018 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Fri Feb 16 16:37:06 2018 +0000

----------------------------------------------------------------------
 .../org/apache/hadoop/fs/s3a/S3AFileSystem.java |  40 +++--
 .../java/org/apache/hadoop/fs/s3a/S3AUtils.java | 115 ++++++++++---
 .../apache/hadoop/fs/s3a/S3ClientFactory.java   |   4 +-
 .../fs/s3a/SimpleAWSCredentialsProvider.java    |  10 +-
 .../fs/s3a/TemporaryAWSCredentialsProvider.java |  14 +-
 .../s3a/auth/AssumedRoleCredentialProvider.java |   7 +-
 .../src/site/markdown/tools/hadoop-aws/index.md |  62 +++++--
 .../hadoop/fs/s3a/TestSSEConfiguration.java     | 171 ++++++++++++++-----
 .../hadoop/fs/s3a/auth/ITestAssumeRole.java     |  16 +-
 9 files changed, 332 insertions(+), 107 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/7ac88244/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index da8f38b..53875ba 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -79,7 +79,6 @@ import com.google.common.base.Preconditions;
 import com.google.common.util.concurrent.ListeningExecutorService;
 
 import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
@@ -122,6 +121,8 @@ import static org.apache.hadoop.fs.s3a.Constants.*;
 import static org.apache.hadoop.fs.s3a.Invoker.*;
 import static org.apache.hadoop.fs.s3a.S3AUtils.*;
 import static org.apache.hadoop.fs.s3a.Statistic.*;
+import static org.apache.commons.lang.StringUtils.isNotBlank;
+import static org.apache.commons.lang.StringUtils.isNotEmpty;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -300,7 +301,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities {
 
       verifyBucketExists();
 
-      serverSideEncryptionAlgorithm = getEncryptionAlgorithm(conf);
+      serverSideEncryptionAlgorithm = getEncryptionAlgorithm(bucket, conf);
       inputPolicy = S3AInputPolicy.getPolicy(
           conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL));
       LOG.debug("Input fadvise policy = {}", inputPolicy);
@@ -700,7 +701,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities {
           bucket,
           pathToKey(f),
           serverSideEncryptionAlgorithm,
-          getServerSideEncryptionKey(getConf())),
+          getServerSideEncryptionKey(bucket, getConf())),
             fileStatus.getLen(),
             s3,
             statistics,
@@ -1217,7 +1218,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities {
         new GetObjectMetadataRequest(bucket, key);
     //SSE-C requires to be filled in if enabled for object metadata
     if(S3AEncryptionMethods.SSE_C.equals(serverSideEncryptionAlgorithm) &&
-        StringUtils.isNotBlank(getServerSideEncryptionKey(getConf()))){
+        isNotBlank(getServerSideEncryptionKey(bucket, getConf()))){
       request.setSSECustomerKey(generateSSECustomerKey());
     }
     ObjectMetadata meta = invoker.retryUntranslated("GET " + key, true,
@@ -1440,7 +1441,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities {
       ObjectMetadata metadata,
       InputStream inputStream) {
     Preconditions.checkNotNull(inputStream);
-    Preconditions.checkArgument(StringUtils.isNotEmpty(key), "Null/empty key");
+    Preconditions.checkArgument(isNotEmpty(key), "Null/empty key");
     PutObjectRequest putObjectRequest = new PutObjectRequest(bucket, key,
         inputStream, metadata);
     setOptionalPutRequestParameters(putObjectRequest);
@@ -2545,7 +2546,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities {
       req.setSSEAwsKeyManagementParams(generateSSEAwsKeyParams());
       break;
     case SSE_C:
-      if (StringUtils.isNotBlank(getServerSideEncryptionKey(getConf()))) {
+      if (isNotBlank(getServerSideEncryptionKey(bucket, getConf()))) {
         //at the moment, only supports copy using the same key
         req.setSSECustomerKey(generateSSECustomerKey());
       }
@@ -2579,7 +2580,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities {
       );
       break;
     case SSE_C:
-      if (StringUtils.isNotBlank(getServerSideEncryptionKey(getConf()))) {
+      if (isNotBlank(getServerSideEncryptionKey(bucket, getConf()))) {
         //at the moment, only supports copy using the same key
         SSECustomerKey customerKey = generateSSECustomerKey();
         copyObjectRequest.setSourceSSECustomerKey(customerKey);
@@ -2596,7 +2597,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities {
       request.setSSEAwsKeyManagementParams(generateSSEAwsKeyParams());
       break;
     case SSE_C:
-      if (StringUtils.isNotBlank(getServerSideEncryptionKey(getConf()))) {
+      if (isNotBlank(getServerSideEncryptionKey(bucket, getConf()))) {
         request.setSSECustomerKey(generateSSECustomerKey());
       }
       break;
@@ -2610,23 +2611,32 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities {
     }
   }
 
+  /**
+   * Create the AWS SDK structure used to configure SSE, based on the
+   * configuration.
+   * @return an instance of the class, which main contain the encryption key
+   */
+  @Retries.OnceExceptionsSwallowed
   private SSEAwsKeyManagementParams generateSSEAwsKeyParams() {
     //Use specified key, otherwise default to default master aws/s3 key by AWS
     SSEAwsKeyManagementParams sseAwsKeyManagementParams =
         new SSEAwsKeyManagementParams();
-    if (StringUtils.isNotBlank(getServerSideEncryptionKey(getConf()))) {
-      sseAwsKeyManagementParams =
-        new SSEAwsKeyManagementParams(
-          getServerSideEncryptionKey(getConf())
-        );
+    String encryptionKey = getServerSideEncryptionKey(bucket, getConf());
+    if (isNotBlank(encryptionKey)) {
+      sseAwsKeyManagementParams = new SSEAwsKeyManagementParams(encryptionKey);
     }
     return sseAwsKeyManagementParams;
   }
 
+  /**
+   * Create the SSE-C structure for the AWS SDK.
+   * This will contain a secret extracted from the bucket/configuration.
+   * @return the customer key.
+   */
+  @Retries.OnceExceptionsSwallowed
   private SSECustomerKey generateSSECustomerKey() {
     SSECustomerKey customerKey = new SSECustomerKey(
-        getServerSideEncryptionKey(getConf())
-    );
+        getServerSideEncryptionKey(bucket, getConf()));
     return customerKey;
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/7ac88244/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
index 2b64a76..6a01a80 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
@@ -118,6 +118,8 @@ public final class S3AUtils {
   private static final String EOF_MESSAGE_IN_XML_PARSER
       = "Failed to sanitize XML document destined for handler class";
 
+  private static final String BUCKET_PATTERN = FS_S3A_BUCKET_PREFIX + "%s.%s";
+
 
   private S3AUtils() {
   }
@@ -540,7 +542,8 @@ public final class S3AUtils {
   /**
    * Create the AWS credentials from the providers, the URI and
    * the key {@link Constants#AWS_CREDENTIALS_PROVIDER} in the configuration.
-   * @param binding Binding URI, may contain user:pass login details
+   * @param binding Binding URI, may contain user:pass login details;
+   * may be null
    * @param conf filesystem configuration
    * @return a credentials provider list
    * @throws IOException Problems loading the providers (including reading
@@ -560,7 +563,9 @@ public final class S3AUtils {
       credentials.add(InstanceProfileCredentialsProvider.getInstance());
     } else {
       for (Class<?> aClass : awsClasses) {
-        credentials.add(createAWSCredentialProvider(conf, aClass));
+        credentials.add(createAWSCredentialProvider(conf,
+            aClass,
+            binding));
       }
     }
     // make sure the logging message strips out any auth details
@@ -594,8 +599,8 @@ public final class S3AUtils {
    * attempted in order:
    *
    * <ol>
-   * <li>a public constructor accepting
-   *    org.apache.hadoop.conf.Configuration</li>
+   * <li>a public constructor accepting java.net.URI and
+   *     org.apache.hadoop.conf.Configuration</li>
    * <li>a public static method named getInstance that accepts no
    *    arguments and returns an instance of
    *    com.amazonaws.auth.AWSCredentialsProvider, or</li>
@@ -604,11 +609,14 @@ public final class S3AUtils {
    *
    * @param conf configuration
    * @param credClass credential class
+   * @param uri URI of the FS
    * @return the instantiated class
    * @throws IOException on any instantiation failure.
    */
   public static AWSCredentialsProvider createAWSCredentialProvider(
-      Configuration conf, Class<?> credClass) throws IOException {
+      Configuration conf,
+      Class<?> credClass,
+      URI uri) throws IOException {
     AWSCredentialsProvider credentials;
     String className = credClass.getName();
     if (!AWSCredentialsProvider.class.isAssignableFrom(credClass)) {
@@ -620,8 +628,15 @@ public final class S3AUtils {
     LOG.debug("Credential provider class is {}", className);
 
     try {
+      // new X(uri, conf)
+      Constructor cons = getConstructor(credClass, URI.class,
+          Configuration.class);
+      if (cons != null) {
+        credentials = (AWSCredentialsProvider)cons.newInstance(uri, conf);
+        return credentials;
+      }
       // new X(conf)
-      Constructor cons = getConstructor(credClass, Configuration.class);
+      cons = getConstructor(credClass, Configuration.class);
       if (cons != null) {
         credentials = (AWSCredentialsProvider)cons.newInstance(conf);
         return credentials;
@@ -676,7 +691,7 @@ public final class S3AUtils {
    * Return the access key and secret for S3 API use.
    * Credentials may exist in configuration, within credential providers
    * or indicated in the UserInfo of the name URI param.
-   * @param name the URI for which we need the access keys.
+   * @param name the URI for which we need the access keys; may be null
    * @param conf the Configuration object to interrogate for keys.
    * @return AWSAccessKeys
    * @throws IOException problems retrieving passwords from KMS.
@@ -687,12 +702,65 @@ public final class S3AUtils {
         S3xLoginHelper.extractLoginDetailsWithWarnings(name);
     Configuration c = ProviderUtils.excludeIncompatibleCredentialProviders(
         conf, S3AFileSystem.class);
-    String accessKey = getPassword(c, ACCESS_KEY, login.getUser());
-    String secretKey = getPassword(c, SECRET_KEY, login.getPassword());
+    String bucket = name != null ? name.getHost() : "";
+
+    // build the secrets. as getPassword() uses the last arg as
+    // the return value if non-null, the ordering of
+    // login -> bucket -> base is critical
+
+    // get the bucket values
+    String accessKey = lookupPassword(bucket, c, ACCESS_KEY,
+        login.getUser());
+
+    // finally the base
+    String secretKey = lookupPassword(bucket, c, SECRET_KEY,
+        login.getPassword());
+
+    // and override with any per bucket values
     return new S3xLoginHelper.Login(accessKey, secretKey);
   }
 
   /**
+   * Get a password from a configuration, including JCEKS files, handling both
+   * the absolute key and bucket override.
+   * @param bucket bucket or "" if none known
+   * @param conf configuration
+   * @param baseKey base key to look up, e.g "fs.s3a.secret.key"
+   * @param overrideVal override value: if non empty this is used instead of
+   * querying the configuration.
+   * @return a password or "".
+   * @throws IOException on any IO problem
+   * @throws IllegalArgumentException bad arguments
+   */
+  public static String lookupPassword(
+      String bucket,
+      Configuration conf,
+      String baseKey,
+      String overrideVal)
+      throws IOException {
+    String initialVal;
+    Preconditions.checkArgument(baseKey.startsWith(FS_S3A_PREFIX),
+        "%s does not start with $%s", baseKey, FS_S3A_PREFIX);
+    // if there's a bucket, work with it
+    if (StringUtils.isNotEmpty(bucket)) {
+      String subkey = baseKey.substring(FS_S3A_PREFIX.length());
+      String shortBucketKey = String.format(
+          BUCKET_PATTERN, bucket, subkey);
+      String longBucketKey = String.format(
+          BUCKET_PATTERN, bucket, baseKey);
+
+      // set from the long key unless overidden.
+      initialVal = getPassword(conf, longBucketKey, overrideVal);
+      // then override from the short one if it is set
+      initialVal = getPassword(conf, shortBucketKey, initialVal);
+    } else {
+      // no bucket, make the initial value the override value
+      initialVal = overrideVal;
+    }
+    return getPassword(conf, baseKey, initialVal);
+  }
+
+  /**
    * Get a password from a configuration, or, if a value is passed in,
    * pick that up instead.
    * @param conf configuration
@@ -702,10 +770,9 @@ public final class S3AUtils {
    * @return a password or "".
    * @throws IOException on any problem
    */
-  static String getPassword(Configuration conf, String key, String val)
+  private static String getPassword(Configuration conf, String key, String val)
       throws IOException {
-    String defVal = "";
-    return getPassword(conf, key, val, defVal);
+    return getPassword(conf, key, val, "");
   }
 
   /**
@@ -1124,16 +1191,21 @@ public final class S3AUtils {
    * This operation handles the case where the option has been
    * set in the provider or configuration to the option
    * {@code OLD_S3A_SERVER_SIDE_ENCRYPTION_KEY}.
+   * IOExceptions raised during retrieval are swallowed.
+   * @param bucket bucket to query for
    * @param conf configuration to examine
-   * @return the encryption key or null
+   * @return the encryption key or ""
+   * @throws IllegalArgumentException bad arguments.
    */
-  static String getServerSideEncryptionKey(Configuration conf) {
+  static String getServerSideEncryptionKey(String bucket,
+      Configuration conf) {
     try {
-      return lookupPassword(conf, SERVER_SIDE_ENCRYPTION_KEY,
+      return lookupPassword(bucket, conf,
+          SERVER_SIDE_ENCRYPTION_KEY,
           getPassword(conf, OLD_S3A_SERVER_SIDE_ENCRYPTION_KEY,
               null, null));
     } catch (IOException e) {
-      LOG.error("Cannot retrieve SERVER_SIDE_ENCRYPTION_KEY", e);
+      LOG.error("Cannot retrieve " + SERVER_SIDE_ENCRYPTION_KEY, e);
       return "";
     }
   }
@@ -1142,16 +1214,19 @@ public final class S3AUtils {
    * Get the server-side encryption algorithm.
    * This includes validation of the configuration, checking the state of
    * the encryption key given the chosen algorithm.
+   *
+   * @param bucket bucket to query for
    * @param conf configuration to scan
    * @return the encryption mechanism (which will be {@code NONE} unless
    * one is set.
    * @throws IOException on any validation problem.
    */
-  static S3AEncryptionMethods getEncryptionAlgorithm(Configuration conf)
-      throws IOException {
+  static S3AEncryptionMethods getEncryptionAlgorithm(String bucket,
+      Configuration conf) throws IOException {
     S3AEncryptionMethods sse = S3AEncryptionMethods.getMethod(
-        conf.getTrimmed(SERVER_SIDE_ENCRYPTION_ALGORITHM));
-    String sseKey = getServerSideEncryptionKey(conf);
+        lookupPassword(bucket, conf,
+            SERVER_SIDE_ENCRYPTION_ALGORITHM, null));
+    String sseKey = getServerSideEncryptionKey(bucket, conf);
     int sseKeyLen = StringUtils.isBlank(sseKey) ? 0 : sseKey.length();
     String diagnostics = passwordDiagnostics(sseKey, "key");
     switch (sse) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/7ac88244/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
index e7603d9..9abb362 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
@@ -34,9 +34,7 @@ import org.apache.hadoop.classification.InterfaceStability;
 public interface S3ClientFactory {
 
   /**
-   * Creates a new {@link AmazonS3} client.  This method accepts the S3A file
-   * system URI both in raw input form and validated form as separate arguments,
-   * because both values may be useful in logging.
+   * Creates a new {@link AmazonS3} client.
    *
    * @param name raw input S3A file system URI
    * @return S3 client

http://git-wip-us.apache.org/repos/asf/hadoop/blob/7ac88244/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java
index ec372bd..9939bb2 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.ProviderUtils;
 
 import java.io.IOException;
+import java.net.URI;
 
 import static org.apache.hadoop.fs.s3a.Constants.ACCESS_KEY;
 import static org.apache.hadoop.fs.s3a.Constants.SECRET_KEY;
@@ -50,12 +51,13 @@ public class SimpleAWSCredentialsProvider implements AWSCredentialsProvider {
   private String secretKey;
   private IOException lookupIOE;
 
-  public SimpleAWSCredentialsProvider(Configuration conf) {
+  public SimpleAWSCredentialsProvider(URI uri, Configuration conf) {
     try {
+      String bucket = uri != null ? uri.getHost() : "";
       Configuration c = ProviderUtils.excludeIncompatibleCredentialProviders(
           conf, S3AFileSystem.class);
-      this.accessKey = S3AUtils.lookupPassword(c, ACCESS_KEY, null);
-      this.secretKey = S3AUtils.lookupPassword(c, SECRET_KEY, null);
+      this.accessKey = S3AUtils.lookupPassword(bucket, c, ACCESS_KEY, null);
+      this.secretKey = S3AUtils.lookupPassword(bucket, c, SECRET_KEY, null);
     } catch (IOException e) {
       lookupIOE = e;
     }
@@ -71,7 +73,7 @@ public class SimpleAWSCredentialsProvider implements AWSCredentialsProvider {
       return new BasicAWSCredentials(accessKey, secretKey);
     }
     throw new CredentialInitializationException(
-        "Access key, secret key or session token is unset");
+        "Access key or secret key is unset");
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hadoop/blob/7ac88244/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java
index 22b23a4..e959908 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java
@@ -24,6 +24,7 @@ import com.amazonaws.auth.AWSCredentials;
 import org.apache.commons.lang.StringUtils;
 
 import java.io.IOException;
+import java.net.URI;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
@@ -31,6 +32,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.ProviderUtils;
 
 import static org.apache.hadoop.fs.s3a.Constants.*;
+import static org.apache.hadoop.fs.s3a.S3AUtils.lookupPassword;
 
 /**
  * Support session credentials for authenticating with AWS.
@@ -51,12 +53,18 @@ public class TemporaryAWSCredentialsProvider implements AWSCredentialsProvider {
   private IOException lookupIOE;
 
   public TemporaryAWSCredentialsProvider(Configuration conf) {
+    this(null, conf);
+  }
+
+  public TemporaryAWSCredentialsProvider(URI uri, Configuration conf) {
     try {
+      // determine the bucket
+      String bucket = uri != null ? uri.getHost():  "";
       Configuration c = ProviderUtils.excludeIncompatibleCredentialProviders(
           conf, S3AFileSystem.class);
-      this.accessKey = S3AUtils.lookupPassword(c, ACCESS_KEY, null);
-      this.secretKey = S3AUtils.lookupPassword(c, SECRET_KEY, null);
-      this.sessionToken = S3AUtils.lookupPassword(c, SESSION_TOKEN, null);
+      this.accessKey = lookupPassword(bucket, c, ACCESS_KEY, null);
+      this.secretKey = lookupPassword(bucket, c, SECRET_KEY, null);
+      this.sessionToken = lookupPassword(bucket, c, SESSION_TOKEN, null);
     } catch (IOException e) {
       lookupIOE = e;
     }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/7ac88244/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java
index 42809c8..4b6a77e 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.fs.s3a.auth;
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.net.URI;
 import java.util.Locale;
 import java.util.concurrent.TimeUnit;
 
@@ -80,12 +81,14 @@ public class AssumedRoleCredentialProvider implements AWSCredentialsProvider,
    * Instantiate.
    * This calls {@link #getCredentials()} to fail fast on the inner
    * role credential retrieval.
+   * @param uri URI of endpoint.
    * @param conf configuration
    * @throws IOException on IO problems and some parameter checking
    * @throws IllegalArgumentException invalid parameters
    * @throws AWSSecurityTokenServiceException problems getting credentials
    */
-  public AssumedRoleCredentialProvider(Configuration conf) throws IOException {
+  public AssumedRoleCredentialProvider(URI uri, Configuration conf)
+      throws IOException {
 
     arn = conf.getTrimmed(ASSUMED_ROLE_ARN, "");
     if (StringUtils.isEmpty(arn)) {
@@ -101,7 +104,7 @@ public class AssumedRoleCredentialProvider implements AWSCredentialsProvider,
       if (this.getClass().equals(aClass)) {
         throw new IOException(E_FORBIDDEN_PROVIDER);
       }
-      credentials.add(createAWSCredentialProvider(conf, aClass));
+      credentials.add(createAWSCredentialProvider(conf, aClass, uri));
     }
 
     // then the STS binding

http://git-wip-us.apache.org/repos/asf/hadoop/blob/7ac88244/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index edf392d..88cab37 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -548,15 +548,33 @@ to keep secrets outside Hadoop configuration files, storing them in encrypted
 files in local or Hadoop filesystems, and including them in requests.
 
 The S3A configuration options with sensitive data
-(`fs.s3a.secret.key`, `fs.s3a.access.key` and `fs.s3a.session.token`) can
+(`fs.s3a.secret.key`, `fs.s3a.access.key`,  `fs.s3a.session.token`
+and `fs.s3a.server-side-encryption.key`) can
 have their data saved to a binary file stored, with the values being read in
 when the S3A filesystem URL is used for data access. The reference to this
-credential provider is all that is passed as a direct configuration option.
+credential provider then declareed in the hadoop configuration.
 
 For additional reading on the Hadoop Credential Provider API see:
 [Credential Provider API](../../../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html).
 
 
+The following configuration options can be storeed in Hadoop Credential Provider
+stores.
+
+```
+fs.s3a.access.key
+fs.s3a.secret.key
+fs.s3a.session.token
+fs.s3a.server-side-encryption.key
+fs.s3a.server-side-encryption-algorithm
+```
+
+The first three are for authentication; the final two for
+[encryption](./encryption.html). Of the latter, only the encryption key can
+be considered "sensitive". However, being able to include the algorithm in
+the credentials allows for a JCECKS file to contain all the options needed
+to encrypt new data written to S3.
+
 ### Step 1: Create a credential file
 
 A credential file can be created on any Hadoop filesystem; when creating one on HDFS or
@@ -565,7 +583,6 @@ private to the reader —though as directory permissions are not touched,
 users should verify that the directory containing the file is readable only by
 the current user.
 
-
 ```bash
 hadoop credential create fs.s3a.access.key -value 123 \
     -provider jceks://hdfs@nn1.example.com:9001/user/backup/s3.jceks
@@ -621,9 +638,12 @@ over that of the `hadoop.security` list (i.e. they are prepended to the common l
 </property>
 ```
 
-Supporting a separate list in an `fs.s3a.` prefix permits per-bucket configuration
-of credential files.
-
+This was added to suppport binding different credential providers on a per
+bucket basis, without adding alternative secrets in the credential list.
+However, some applications (e.g Hive) prevent the list of credential providers
+from being dynamically updated by users. As per-bucket secrets are now supported,
+it is better to include per-bucket keys in JCEKS files and other sources
+of credentials.
 
 ### Using secrets from credential providers
 
@@ -1133,16 +1153,28 @@ Finally, the public `s3a://landsat-pds/` bucket can be accessed anonymously:
 
 ### Customizing S3A secrets held in credential files
 
-Although most properties are automatically propagated from their
-`fs.s3a.bucket.`-prefixed custom entry to that of the base `fs.s3a.` option
-supporting secrets kept in Hadoop credential files is slightly more complex.
-This is because the property values are kept in these files, and cannot be
-dynamically patched.
 
-Instead, callers need to create different configuration files for each
-bucket, setting the base secrets (`fs.s3a.access.key`, etc),
-then declare the path to the appropriate credential file in
-a bucket-specific version of the property `fs.s3a.security.credential.provider.path`.
+Secrets in JCEKS files or provided by other Hadoop credential providers
+can also be configured on a per bucket basis. The S3A client will
+look for the per-bucket secrets be
+
+
+Consider a JCEKS file with six keys:
+
+```
+fs.s3a.access.key
+fs.s3a.secret.key
+fs.s3a.server-side-encryption-algorithm
+fs.s3a.bucket.nightly.access.key
+fs.s3a.bucket.nightly.secret.key
+fs.s3a.bucket.nightly.session.token
+fs.s3a.bucket.nightly.server-side-encryption.key
+fs.s3a.bucket.nightly.server-side-encryption-algorithm
+```
+
+When accessing the bucket `s3a://nightly/`, the per-bucket configuration
+options for that backet will be used, here the access keys and token,
+and including the encryption algorithm and key.
 
 
 ###  <a name="per_bucket_endpoints"></a>Using Per-Bucket Configuration to access data round the world

http://git-wip-us.apache.org/repos/asf/hadoop/blob/7ac88244/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestSSEConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestSSEConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestSSEConfiguration.java
index b9b03ba..20baacc 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestSSEConfiguration.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestSSEConfiguration.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.fs.s3a;
 import java.io.File;
 import java.io.IOException;
 import java.net.URI;
-import java.util.concurrent.Callable;
 
 import org.junit.Assert;
 import org.junit.Rule;
@@ -41,9 +40,14 @@ import static org.apache.hadoop.test.LambdaTestUtils.*;
 
 /**
  * Test SSE setup operations and errors raised.
+ * Tests related to secret providers and AWS credentials are also
+ * included, as they share some common setup operations.
  */
 public class TestSSEConfiguration extends Assert {
 
+  /** Bucket to use for per-bucket options. */
+  public static final String BUCKET = "dataset-1";
+
   @Rule
   public Timeout testTimeout = new Timeout(
       S3ATestConstants.S3A_TEST_TIMEOUT
@@ -54,12 +58,12 @@ public class TestSSEConfiguration extends Assert {
 
   @Test
   public void testSSECNoKey() throws Throwable {
-    assertExceptionTextEquals(SSE_C_NO_KEY_ERROR, SSE_C.getMethod(), null);
+    assertGetAlgorithmFails(SSE_C_NO_KEY_ERROR, SSE_C.getMethod(), null);
   }
 
   @Test
   public void testSSECBlankKey() throws Throwable {
-    assertExceptionTextEquals(SSE_C_NO_KEY_ERROR, SSE_C.getMethod(), "");
+    assertGetAlgorithmFails(SSE_C_NO_KEY_ERROR, SSE_C.getMethod(), "");
   }
 
   @Test
@@ -74,74 +78,67 @@ public class TestSSEConfiguration extends Assert {
 
   @Test
   public void testKMSGoodOldOptionName() throws Throwable {
-    Configuration conf = new Configuration(false);
+    Configuration conf = emptyConf();
     conf.set(SERVER_SIDE_ENCRYPTION_ALGORITHM, SSE_KMS.getMethod());
     conf.set(OLD_S3A_SERVER_SIDE_ENCRYPTION_KEY, "kmskeyID");
     // verify key round trip
-    assertEquals("kmskeyID", getServerSideEncryptionKey(conf));
+    assertEquals("kmskeyID", getServerSideEncryptionKey(BUCKET, conf));
     // and that KMS lookup finds it
-    assertEquals(SSE_KMS, getEncryptionAlgorithm(conf));
+    assertEquals(SSE_KMS, getEncryptionAlgorithm(BUCKET, conf));
   }
 
   @Test
   public void testAESKeySet() throws Throwable {
-    assertExceptionTextEquals(SSE_S3_WITH_KEY_ERROR,
+    assertGetAlgorithmFails(SSE_S3_WITH_KEY_ERROR,
         SSE_S3.getMethod(), "setkey");
   }
 
   @Test
-  public void testSSEEmptyKey() throws Throwable {
+  public void testSSEEmptyKey() {
     // test the internal logic of the test setup code
     Configuration c = buildConf(SSE_C.getMethod(), "");
-    assertEquals("", getServerSideEncryptionKey(c));
+    assertEquals("", getServerSideEncryptionKey(BUCKET, c));
   }
 
   @Test
   public void testSSEKeyNull() throws Throwable {
     // test the internal logic of the test setup code
     final Configuration c = buildConf(SSE_C.getMethod(), null);
-    assertNull("", getServerSideEncryptionKey(c));
+    assertEquals("", getServerSideEncryptionKey(BUCKET, c));
 
     intercept(IOException.class, SSE_C_NO_KEY_ERROR,
-        new Callable<S3AEncryptionMethods>() {
-          @Override
-          public S3AEncryptionMethods call() throws Exception {
-            return getEncryptionAlgorithm(c);
-          }
-        });
+        () -> getEncryptionAlgorithm(BUCKET, c));
   }
 
   @Test
   public void testSSEKeyFromCredentialProvider() throws Exception {
     // set up conf to have a cred provider
-    final Configuration conf = new Configuration();
-    addFileProvider(conf);
+    final Configuration conf = confWithProvider();
     String key = "provisioned";
-    provisionSSEKey(conf, SERVER_SIDE_ENCRYPTION_KEY, key);
+    setProviderOption(conf, SERVER_SIDE_ENCRYPTION_KEY, key);
     // let's set the password in config and ensure that it uses the credential
     // provider provisioned value instead.
     conf.set(SERVER_SIDE_ENCRYPTION_KEY, "keyInConfObject");
 
-    String sseKey = getServerSideEncryptionKey(conf);
+    String sseKey = getServerSideEncryptionKey(BUCKET, conf);
     assertNotNull("Proxy password should not retrun null.", sseKey);
     assertEquals("Proxy password override did NOT work.", key, sseKey);
   }
 
   /**
-   * Very that the old key is picked up via the properties
+   * Very that the old key is picked up via the properties.
    * @throws Exception failure
    */
   @Test
   public void testOldKeyFromCredentialProvider() throws Exception {
     // set up conf to have a cred provider
-    final Configuration conf = new Configuration();
-    addFileProvider(conf);
+    final Configuration conf = confWithProvider();
     String key = "provisioned";
-    provisionSSEKey(conf, OLD_S3A_SERVER_SIDE_ENCRYPTION_KEY, key);
+    setProviderOption(conf, OLD_S3A_SERVER_SIDE_ENCRYPTION_KEY, key);
     // let's set the password in config and ensure that it uses the credential
     // provider provisioned value instead.
     //conf.set(OLD_S3A_SERVER_SIDE_ENCRYPTION_KEY, "oldKeyInConf");
-    String sseKey = getServerSideEncryptionKey(conf);
+    String sseKey = getServerSideEncryptionKey(BUCKET, conf);
     assertNotNull("Proxy password should not retrun null.", sseKey);
     assertEquals("Proxy password override did NOT work.", key, sseKey);
   }
@@ -161,38 +158,35 @@ public class TestSSEConfiguration extends Assert {
   }
 
   /**
-   * Set the SSE Key via the provision API, not the config itself.
+   * Set the an option under the configuration via the
+   * {@link CredentialProviderFactory} APIs.
    * @param conf config
    * @param option option name
-   * @param key key to set
+   * @param value value to set option to.
    * @throws Exception failure
    */
-  void provisionSSEKey(final Configuration conf,
-      String option, String key) throws Exception {
+  void setProviderOption(final Configuration conf,
+      String option, String value) throws Exception {
     // add our password to the provider
     final CredentialProvider provider =
         CredentialProviderFactory.getProviders(conf).get(0);
     provider.createCredentialEntry(option,
-        key.toCharArray());
+        value.toCharArray());
     provider.flush();
   }
 
   /**
-   * Assert that the exception text from a config contains the expected string
-   * @param expected expected substring
+   * Assert that the exception text from {@link #getAlgorithm(String, String)}
+   * is as expected.
+   * @param expected expected substring in error
    * @param alg algorithm to ask for
    * @param key optional key value
    * @throws Exception anything else which gets raised
    */
-  public void assertExceptionTextEquals(String expected,
+  public void assertGetAlgorithmFails(String expected,
       final String alg, final String key) throws Exception {
     intercept(IOException.class, expected,
-        new Callable<S3AEncryptionMethods>() {
-          @Override
-          public S3AEncryptionMethods call() throws Exception {
-            return getAlgorithm(alg, key);
-          }
-        });
+        () -> getAlgorithm(alg, key));
   }
 
   private S3AEncryptionMethods getAlgorithm(S3AEncryptionMethods algorithm,
@@ -203,11 +197,18 @@ public class TestSSEConfiguration extends Assert {
 
   private S3AEncryptionMethods getAlgorithm(String algorithm, String key)
       throws IOException {
-    return getEncryptionAlgorithm(buildConf(algorithm, key));
+    return getEncryptionAlgorithm(BUCKET, buildConf(algorithm, key));
   }
 
+  /**
+   * Build a new configuration with the given S3-SSE algorithm
+   * and key.
+   * @param algorithm  algorithm to use, may be null
+   * @param key key, may be null
+   * @return the new config.
+   */
   private Configuration buildConf(String algorithm, String key) {
-    Configuration conf = new Configuration(false);
+    Configuration conf = emptyConf();
     if (algorithm != null) {
       conf.set(SERVER_SIDE_ENCRYPTION_ALGORITHM, algorithm);
     } else {
@@ -220,4 +221,92 @@ public class TestSSEConfiguration extends Assert {
     }
     return conf;
   }
+
+  /**
+   * Create an empty conf: no -default or -site values.
+   * @return an empty configuration
+   */
+  private Configuration emptyConf() {
+    return new Configuration(false);
+  }
+
+  /**
+   * Create a configuration with no defaults and bonded to a file
+   * provider, so that
+   * {@link #setProviderOption(Configuration, String, String)}
+   * can be used to set a secret.
+   * @return the configuration
+   * @throws Exception any failure
+   */
+  private Configuration confWithProvider() throws Exception {
+    final Configuration conf = emptyConf();
+    addFileProvider(conf);
+    return conf;
+  }
+
+
+  private static final String SECRET = "*secret*";
+
+  private static final String BUCKET_PATTERN = FS_S3A_BUCKET_PREFIX + "%s.%s";
+
+  @Test
+  public void testGetPasswordFromConf() throws Throwable {
+    final Configuration conf = emptyConf();
+    conf.set(SECRET_KEY, SECRET);
+    assertEquals(SECRET, lookupPassword(conf, SECRET_KEY, ""));
+    assertEquals(SECRET, lookupPassword(conf, SECRET_KEY, "defVal"));
+  }
+
+  @Test
+  public void testGetPasswordFromProvider() throws Throwable {
+    final Configuration conf = confWithProvider();
+    setProviderOption(conf, SECRET_KEY, SECRET);
+    assertEquals(SECRET, lookupPassword(conf, SECRET_KEY, ""));
+    assertSecretKeyEquals(conf, null, SECRET, "");
+    assertSecretKeyEquals(conf, null, "overidden", "overidden");
+  }
+
+  @Test
+  public void testGetBucketPasswordFromProvider() throws Throwable {
+    final Configuration conf = confWithProvider();
+    URI bucketURI = new URI("s3a://"+ BUCKET +"/");
+    setProviderOption(conf, SECRET_KEY, "unbucketed");
+
+    String bucketedKey = String.format(BUCKET_PATTERN, BUCKET, SECRET_KEY);
+    setProviderOption(conf, bucketedKey, SECRET);
+    String overrideVal;
+    overrideVal = "";
+    assertSecretKeyEquals(conf, BUCKET, SECRET, overrideVal);
+    assertSecretKeyEquals(conf, bucketURI.getHost(), SECRET, "");
+    assertSecretKeyEquals(conf, bucketURI.getHost(), "overidden", "overidden");
+  }
+
+  /**
+   * Assert that a secret key is as expected.
+   * @param conf configuration to examine
+   * @param bucket bucket name
+   * @param expected expected value
+   * @param overrideVal override value in {@code S3AUtils.lookupPassword()}
+   * @throws IOException IO problem
+   */
+  private void assertSecretKeyEquals(Configuration conf,
+      String bucket,
+      String expected, String overrideVal) throws IOException {
+    assertEquals(expected,
+        S3AUtils.lookupPassword(bucket, conf, SECRET_KEY, overrideVal));
+  }
+
+  @Test
+  public void testGetBucketPasswordFromProviderShort() throws Throwable {
+    final Configuration conf = confWithProvider();
+    URI bucketURI = new URI("s3a://"+ BUCKET +"/");
+    setProviderOption(conf, SECRET_KEY, "unbucketed");
+
+    String bucketedKey = String.format(BUCKET_PATTERN, BUCKET, "secret.key");
+    setProviderOption(conf, bucketedKey, SECRET);
+    assertSecretKeyEquals(conf, BUCKET, SECRET, "");
+    assertSecretKeyEquals(conf, bucketURI.getHost(), SECRET, "");
+    assertSecretKeyEquals(conf, bucketURI.getHost(), "overidden", "overidden");
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/7ac88244/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java
index 08171b0..c6985b0 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.fs.s3a.auth;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.net.URI;
 import java.nio.file.AccessDeniedException;
 import java.util.ArrayList;
 import java.util.List;
@@ -43,6 +44,7 @@ import org.apache.hadoop.fs.s3a.AWSBadRequestException;
 import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
 import org.apache.hadoop.fs.s3a.MultipartUtils;
 import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.S3ATestConstants;
 import org.apache.hadoop.fs.s3a.S3AUtils;
 import org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider;
 import org.apache.hadoop.fs.s3a.commit.CommitConstants;
@@ -74,6 +76,11 @@ public class ITestAssumeRole extends AbstractS3ATestBase {
   private static final Path ROOT = new Path("/");
 
   /**
+   * test URI, built in setup.
+   */
+  private URI uri;
+
+  /**
    * A role FS; if non-null it is closed in teardown.
    */
   private S3AFileSystem roleFS;
@@ -82,6 +89,7 @@ public class ITestAssumeRole extends AbstractS3ATestBase {
   public void setup() throws Exception {
     super.setup();
     assumeRoleTests();
+    uri = new URI(S3ATestConstants.DEFAULT_CSVTEST_FILE);
   }
 
   @Override
@@ -128,7 +136,7 @@ public class ITestAssumeRole extends AbstractS3ATestBase {
     conf.set(ASSUMED_ROLE_SESSION_DURATION, "45m");
     bindRolePolicy(conf, RESTRICTED_POLICY);
     try (AssumedRoleCredentialProvider provider
-             = new AssumedRoleCredentialProvider(conf)) {
+             = new AssumedRoleCredentialProvider(uri, conf)) {
       LOG.info("Provider is {}", provider);
       AWSCredentials credentials = provider.getCredentials();
       assertNotNull("Null credentials from " + provider, credentials);
@@ -141,7 +149,7 @@ public class ITestAssumeRole extends AbstractS3ATestBase {
     conf.set(ASSUMED_ROLE_ARN, ROLE_ARN_EXAMPLE);
     interceptClosing(AWSSecurityTokenServiceException.class,
         E_BAD_ROLE,
-        () -> new AssumedRoleCredentialProvider(conf));
+        () -> new AssumedRoleCredentialProvider(uri, conf));
   }
 
   @Test
@@ -264,7 +272,7 @@ public class ITestAssumeRole extends AbstractS3ATestBase {
     conf.set(ASSUMED_ROLE_ARN, "");
     interceptClosing(IOException.class,
         AssumedRoleCredentialProvider.E_NO_ROLE,
-        () -> new AssumedRoleCredentialProvider(conf));
+        () -> new AssumedRoleCredentialProvider(uri, conf));
   }
 
   @Test
@@ -273,7 +281,7 @@ public class ITestAssumeRole extends AbstractS3ATestBase {
     Configuration conf = new Configuration();
     conf.set(ASSUMED_ROLE_SESSION_DURATION, "30s");
     interceptClosing(IllegalArgumentException.class, "",
-        () -> new AssumedRoleCredentialProvider(conf));
+        () -> new AssumedRoleCredentialProvider(uri, conf));
   }
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org

[07/21] hadoop git commit: HADOOP-13972. ADLS to support per-store configuration. Contributed by Sharad Sonker.

Posted by ha...@apache.org.

HADOOP-13972. ADLS to support per-store configuration.
Contributed by Sharad Sonker.

(cherry picked from commit 050f5287b79324b7f6231b879c0bfc608203b980)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/481d79fe
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/481d79fe
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/481d79fe

Branch: refs/heads/HDFS-12996
Commit: 481d79fedc48942654dab08e23e71e80c8eb2aca
Parents: 9a013b2
Author: Steve Loughran <st...@apache.org>
Authored: Thu Feb 15 16:25:55 2018 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Feb 15 16:27:31 2018 +0000

----------------------------------------------------------------------
 .../org/apache/hadoop/fs/adl/AdlConfKeys.java   |   5 +
 .../org/apache/hadoop/fs/adl/AdlFileSystem.java |  81 +++++++++-
 .../src/site/markdown/index.md                  |  37 +++++
 .../fs/adl/TestValidateConfiguration.java       | 152 +++++++++++++++----
 .../hadoop/fs/adl/common/Parallelized.java      |   2 +-
 5 files changed, 239 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/481d79fe/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java
index 790902c..e3a4ad6 100644
--- a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java
+++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java
@@ -33,6 +33,11 @@ public final class AdlConfKeys {
   public static final String AZURE_AD_REFRESH_URL_KEY =
       "fs.adl.oauth2.refresh.url";
 
+  public static final String AZURE_AD_ACCOUNT_PREFIX =
+          "fs.adl.account.";
+  public static final String AZURE_AD_PREFIX =
+          "fs.adl.";
+
   // optional when provider type is refresh or client id.
   public static final String AZURE_AD_TOKEN_PROVIDER_CLASS_KEY =
       "fs.adl.oauth2.access.token.provider";

http://git-wip-us.apache.org/repos/asf/hadoop/blob/481d79fe/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java
index a496595..9f54a36 100644
--- a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java
+++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java
@@ -24,8 +24,10 @@ import java.net.URI;
 import java.util.ArrayList;
 import java.util.EnumSet;
 import java.util.List;
+import java.util.Map;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 import com.microsoft.azure.datalake.store.ADLStoreClient;
 import com.microsoft.azure.datalake.store.ADLStoreOptions;
 import com.microsoft.azure.datalake.store.DirectoryEntry;
@@ -37,6 +39,8 @@ import com.microsoft.azure.datalake.store.oauth2.ClientCredsTokenProvider;
 import com.microsoft.azure.datalake.store.oauth2.DeviceCodeTokenProvider;
 import com.microsoft.azure.datalake.store.oauth2.MsiTokenProvider;
 import com.microsoft.azure.datalake.store.oauth2.RefreshTokenBasedTokenProvider;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -74,6 +78,8 @@ import static org.apache.hadoop.fs.adl.AdlConfKeys.*;
 @InterfaceAudience.Public
 @InterfaceStability.Evolving
 public class AdlFileSystem extends FileSystem {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(AdlFileSystem.class);
   public static final String SCHEME = "adl";
   static final int DEFAULT_PORT = 443;
   private URI uri;
@@ -115,12 +121,19 @@ public class AdlFileSystem extends FileSystem {
   /**
    * Called after a new FileSystem instance is constructed.
    *
-   * @param storeUri a uri whose authority section names the host, port, etc.
-   *                 for this FileSystem
-   * @param conf     the configuration
+   * @param storeUri      a uri whose authority section names the host, port,
+   *                      etc. for this FileSystem
+   * @param originalConf  the configuration to use for the FS. The account-
+   *                      specific options are patched over the base ones
+   *                      before any use is made of the config.
    */
   @Override
-  public void initialize(URI storeUri, Configuration conf) throws IOException {
+  public void initialize(URI storeUri, Configuration originalConf)
+      throws IOException {
+    String hostname = storeUri.getHost();
+    String accountName = getAccountNameFromFQDN(hostname);
+    Configuration conf = propagateAccountOptions(originalConf, accountName);
+
     super.initialize(storeUri, conf);
     this.setConf(conf);
     this.uri = URI
@@ -144,7 +157,6 @@ public class AdlFileSystem extends FileSystem {
 
     String accountFQDN = null;
     String mountPoint = null;
-    String hostname = storeUri.getHost();
     if (!hostname.contains(".") && !hostname.equalsIgnoreCase(
         "localhost")) {  // this is a symbolic name. Resolve it.
       String hostNameProperty = "dfs.adls." + hostname + ".hostname";
@@ -985,4 +997,63 @@ public class AdlFileSystem extends FileSystem {
     oidOrUpn = enableUPN ? UserGroupRepresentation.UPN :
         UserGroupRepresentation.OID;
   }
+
+  /**
+   * Gets ADL account name from ADL FQDN.
+   * @param accountFQDN ADL account fqdn
+   * @return ADL account name
+   */
+  public static String getAccountNameFromFQDN(String accountFQDN) {
+    return accountFQDN.contains(".")
+            ? accountFQDN.substring(0, accountFQDN.indexOf("."))
+            : accountFQDN;
+  }
+
+  /**
+   * Propagates account-specific settings into generic ADL configuration keys.
+   * This is done by propagating the values of the form
+   * {@code fs.adl.account.${account_name}.key} to
+   * {@code fs.adl.key}, for all values of "key"
+   *
+   * The source of the updated property is set to the key name of the account
+   * property, to aid in diagnostics of where things came from.
+   *
+   * Returns a new configuration. Why the clone?
+   * You can use the same conf for different filesystems, and the original
+   * values are not updated.
+   *
+   *
+   * @param source Source Configuration object
+   * @param accountName account name. Must not be empty
+   * @return a (potentially) patched clone of the original
+   */
+  public static Configuration propagateAccountOptions(Configuration source,
+      String accountName) {
+
+    Preconditions.checkArgument(StringUtils.isNotEmpty(accountName),
+        "accountName");
+    final String accountPrefix = AZURE_AD_ACCOUNT_PREFIX + accountName +'.';
+    LOG.debug("Propagating entries under {}", accountPrefix);
+    final Configuration dest = new Configuration(source);
+    for (Map.Entry<String, String> entry : source) {
+      final String key = entry.getKey();
+      // get the (unexpanded) value.
+      final String value = entry.getValue();
+      if (!key.startsWith(accountPrefix) || accountPrefix.equals(key)) {
+        continue;
+      }
+      // there's a account prefix, so strip it
+      final String stripped = key.substring(accountPrefix.length());
+
+      // propagate the value, building a new origin field.
+      // to track overwrites, the generic key is overwritten even if
+      // already matches the new one.
+      String origin = "[" + StringUtils.join(
+              source.getPropertySources(key), ", ") +"]";
+      final String generic = AZURE_AD_PREFIX + stripped;
+      LOG.debug("Updating {} from {}", generic, origin);
+      dest.set(generic, value, key + " via " + origin);
+    }
+    return dest;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/481d79fe/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
index d2b6edf..ad2a983 100644
--- a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
+++ b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
@@ -36,6 +36,7 @@ This support comes via the JAR file `azure-datalake-store.jar`.
 * Tested for scale.
 * API `setOwner()`, `setAcl`, `removeAclEntries()`, `modifyAclEntries()` accepts UPN or OID
   (Object ID) as user and group names.
+* Supports per-account configuration.
 
 ## Limitations
 
@@ -328,6 +329,42 @@ Add the following properties to `core-site.xml`
   </description>
 </property>
 ```
+## Configurations for different ADL accounts
+Different ADL accounts can be accessed with different ADL client configurations.
+This also allows for different login details.
+
+1. All `fs.adl` options can be set on a per account basis.
+1. The account specific option is set by replacing the `fs.adl.` prefix on an option
+with `fs.adl.account.ACCOUNTNAME.`, where `ACCOUNTNAME` is the name of the account.
+1. When connecting to an account, all options explicitly set will override
+the base `fs.adl.` values.
+
+As an example, a configuration could have a base configuration to use the public account
+`adl://<some-public-account>.azuredatalakestore.net/` and an account-specific configuration
+to use some private account `adl://myprivateaccount.azuredatalakestore.net/`
+
+```xml
+<property>
+  <name>fs.adl.oauth2.client.id</name>
+  <value>CLIENTID</value>
+</property>
+
+<property>
+  <name>fs.adl.oauth2.credential</name>
+  <value>CREDENTIAL</value>
+</property>
+
+<property>
+  <name>fs.adl.account.myprivateaccount.oauth2.client.id</name>
+  <value>CLIENTID1</value>
+</property>
+
+<property>
+  <name>fs.adl.account.myprivateaccount.oauth2.credential</name>
+  <value>CREDENTIAL1</value>
+</property>
+```
+
 ## Testing the azure-datalake-store Module
 The `hadoop-azure` module includes a full suite of unit tests.
 Most of the tests will run without additional configuration by running `mvn test`.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/481d79fe/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java
index 3d51b42..0308a69 100644
--- a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java
+++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.fs.adl;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.test.GenericTestUtils;
-import org.junit.Assert;
 import org.junit.Test;
 
 import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_BLOCK_SIZE;
@@ -58,6 +57,8 @@ import static org.apache.hadoop.fs.adl.AdlConfKeys
     .TOKEN_PROVIDER_TYPE_REFRESH_TOKEN;
 import static org.apache.hadoop.fs.adl.AdlConfKeys.WRITE_BUFFER_SIZE_KEY;
 
+import static org.junit.Assert.assertEquals;
+
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
@@ -70,50 +71,48 @@ public class TestValidateConfiguration {
 
   @Test
   public void validateConfigurationKeys() {
-    Assert
-        .assertEquals("fs.adl.oauth2.refresh.url", AZURE_AD_REFRESH_URL_KEY);
-    Assert.assertEquals("fs.adl.oauth2.access.token.provider",
+    assertEquals("fs.adl.oauth2.refresh.url", AZURE_AD_REFRESH_URL_KEY);
+    assertEquals("fs.adl.oauth2.access.token.provider",
         AZURE_AD_TOKEN_PROVIDER_CLASS_KEY);
-    Assert.assertEquals("fs.adl.oauth2.client.id", AZURE_AD_CLIENT_ID_KEY);
-    Assert.assertEquals("fs.adl.oauth2.refresh.token",
+    assertEquals("fs.adl.oauth2.client.id", AZURE_AD_CLIENT_ID_KEY);
+    assertEquals("fs.adl.oauth2.refresh.token",
         AZURE_AD_REFRESH_TOKEN_KEY);
-    Assert
-        .assertEquals("fs.adl.oauth2.credential", AZURE_AD_CLIENT_SECRET_KEY);
-    Assert.assertEquals("adl.debug.override.localuserasfileowner",
+    assertEquals("fs.adl.oauth2.credential", AZURE_AD_CLIENT_SECRET_KEY);
+    assertEquals("adl.debug.override.localuserasfileowner",
         ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER);
 
-    Assert.assertEquals("fs.adl.oauth2.access.token.provider.type",
+    assertEquals("fs.adl.oauth2.access.token.provider.type",
         AZURE_AD_TOKEN_PROVIDER_TYPE_KEY);
 
-    Assert.assertEquals("adl.feature.client.cache.readahead",
+    assertEquals("adl.feature.client.cache.readahead",
         READ_AHEAD_BUFFER_SIZE_KEY);
 
-    Assert.assertEquals("adl.feature.client.cache.drop.behind.writes",
+    assertEquals("adl.feature.client.cache.drop.behind.writes",
         WRITE_BUFFER_SIZE_KEY);
 
-    Assert.assertEquals("RefreshToken", TOKEN_PROVIDER_TYPE_REFRESH_TOKEN);
+    assertEquals("RefreshToken", TOKEN_PROVIDER_TYPE_REFRESH_TOKEN);
 
-    Assert.assertEquals("ClientCredential", TOKEN_PROVIDER_TYPE_CLIENT_CRED);
+    assertEquals("ClientCredential", TOKEN_PROVIDER_TYPE_CLIENT_CRED);
 
-    Assert.assertEquals("adl.enable.client.latency.tracker",
+    assertEquals("adl.enable.client.latency.tracker",
         LATENCY_TRACKER_KEY);
 
-    Assert.assertEquals(true, LATENCY_TRACKER_DEFAULT);
+    assertEquals(true, LATENCY_TRACKER_DEFAULT);
 
-    Assert.assertEquals(true, ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT);
+    assertEquals(true, ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT);
 
-    Assert.assertEquals("adl.feature.experiment.positional.read.enable",
+    assertEquals("adl.feature.experiment.positional.read.enable",
         ADL_EXPERIMENT_POSITIONAL_READ_KEY);
 
-    Assert.assertEquals(1, ADL_REPLICATION_FACTOR);
-    Assert.assertEquals(256 * 1024 * 1024, ADL_BLOCK_SIZE);
-    Assert.assertEquals(false, ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT);
-    Assert.assertEquals(4 * 1024 * 1024, DEFAULT_READ_AHEAD_BUFFER_SIZE);
-    Assert.assertEquals(4 * 1024 * 1024, DEFAULT_WRITE_AHEAD_BUFFER_SIZE);
+    assertEquals(1, ADL_REPLICATION_FACTOR);
+    assertEquals(256 * 1024 * 1024, ADL_BLOCK_SIZE);
+    assertEquals(false, ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT);
+    assertEquals(4 * 1024 * 1024, DEFAULT_READ_AHEAD_BUFFER_SIZE);
+    assertEquals(4 * 1024 * 1024, DEFAULT_WRITE_AHEAD_BUFFER_SIZE);
 
-    Assert.assertEquals("adl.feature.ownerandgroup.enableupn",
+    assertEquals("adl.feature.ownerandgroup.enableupn",
         ADL_ENABLEUPN_FOR_OWNERGROUP_KEY);
-    Assert.assertEquals(false,
+    assertEquals(false,
         ADL_ENABLEUPN_FOR_OWNERGROUP_DEFAULT);
   }
 
@@ -152,6 +151,95 @@ public class TestValidateConfiguration {
     assertDeprecatedKeys(conf);
   }
 
+  @Test
+  public void testGetAccountNameFromFQDN() {
+    assertEquals("dummy", AdlFileSystem.
+        getAccountNameFromFQDN("dummy.azuredatalakestore.net"));
+    assertEquals("localhost", AdlFileSystem.
+        getAccountNameFromFQDN("localhost"));
+  }
+
+  @Test
+  public void testPropagateAccountOptionsDefault() {
+    Configuration conf = new Configuration(false);
+    conf.set("fs.adl.oauth2.client.id", "defaultClientId");
+    conf.set("fs.adl.oauth2.credential", "defaultCredential");
+    conf.set("some.other.config", "someValue");
+    Configuration propagatedConf =
+        AdlFileSystem.propagateAccountOptions(conf, "dummy");
+    assertEquals("defaultClientId",
+        propagatedConf.get(AZURE_AD_CLIENT_ID_KEY));
+    assertEquals("defaultCredential",
+        propagatedConf.get(AZURE_AD_CLIENT_SECRET_KEY));
+    assertEquals("someValue",
+        propagatedConf.get("some.other.config"));
+  }
+
+  @Test
+  public void testPropagateAccountOptionsSpecified() {
+    Configuration conf = new Configuration(false);
+    conf.set("fs.adl.account.dummy.oauth2.client.id", "dummyClientId");
+    conf.set("fs.adl.account.dummy.oauth2.credential", "dummyCredential");
+    conf.set("some.other.config", "someValue");
+
+    Configuration propagatedConf =
+        AdlFileSystem.propagateAccountOptions(conf, "dummy");
+    assertEquals("dummyClientId",
+        propagatedConf.get(AZURE_AD_CLIENT_ID_KEY));
+    assertEquals("dummyCredential",
+        propagatedConf.get(AZURE_AD_CLIENT_SECRET_KEY));
+    assertEquals("someValue",
+        propagatedConf.get("some.other.config"));
+
+    propagatedConf =
+        AdlFileSystem.propagateAccountOptions(conf, "anotherDummy");
+    assertEquals(null,
+        propagatedConf.get(AZURE_AD_CLIENT_ID_KEY));
+    assertEquals(null,
+        propagatedConf.get(AZURE_AD_CLIENT_SECRET_KEY));
+    assertEquals("someValue",
+        propagatedConf.get("some.other.config"));
+  }
+
+  @Test
+  public void testPropagateAccountOptionsAll() {
+    Configuration conf = new Configuration(false);
+    conf.set("fs.adl.oauth2.client.id", "defaultClientId");
+    conf.set("fs.adl.oauth2.credential", "defaultCredential");
+    conf.set("some.other.config", "someValue");
+    conf.set("fs.adl.account.dummy1.oauth2.client.id", "dummyClientId1");
+    conf.set("fs.adl.account.dummy1.oauth2.credential", "dummyCredential1");
+    conf.set("fs.adl.account.dummy2.oauth2.client.id", "dummyClientId2");
+    conf.set("fs.adl.account.dummy2.oauth2.credential", "dummyCredential2");
+
+    Configuration propagatedConf =
+        AdlFileSystem.propagateAccountOptions(conf, "dummy1");
+    assertEquals("dummyClientId1",
+        propagatedConf.get(AZURE_AD_CLIENT_ID_KEY));
+    assertEquals("dummyCredential1",
+        propagatedConf.get(AZURE_AD_CLIENT_SECRET_KEY));
+    assertEquals("someValue",
+        propagatedConf.get("some.other.config"));
+
+    propagatedConf =
+        AdlFileSystem.propagateAccountOptions(conf, "dummy2");
+    assertEquals("dummyClientId2",
+        propagatedConf.get(AZURE_AD_CLIENT_ID_KEY));
+    assertEquals("dummyCredential2",
+        propagatedConf.get(AZURE_AD_CLIENT_SECRET_KEY));
+    assertEquals("someValue",
+        propagatedConf.get("some.other.config"));
+
+    propagatedConf =
+        AdlFileSystem.propagateAccountOptions(conf, "anotherDummy");
+    assertEquals("defaultClientId",
+        propagatedConf.get(AZURE_AD_CLIENT_ID_KEY));
+    assertEquals("defaultCredential",
+        propagatedConf.get(AZURE_AD_CLIENT_SECRET_KEY));
+    assertEquals("someValue",
+        propagatedConf.get("some.other.config"));
+  }
+
   private void setDeprecatedKeys(Configuration conf) {
     conf.set("dfs.adls.oauth2.access.token.provider.type", "dummyType");
     conf.set("dfs.adls.oauth2.client.id", "dummyClientId");
@@ -163,19 +251,19 @@ public class TestValidateConfiguration {
   }
 
   private void assertDeprecatedKeys(Configuration conf) {
-    Assert.assertEquals("dummyType",
+    assertEquals("dummyType",
         conf.get(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY));
-    Assert.assertEquals("dummyClientId",
+    assertEquals("dummyClientId",
         conf.get(AZURE_AD_CLIENT_ID_KEY));
-    Assert.assertEquals("dummyRefreshToken",
+    assertEquals("dummyRefreshToken",
         conf.get(AZURE_AD_REFRESH_TOKEN_KEY));
-    Assert.assertEquals("dummyRefreshUrl",
+    assertEquals("dummyRefreshUrl",
         conf.get(AZURE_AD_REFRESH_URL_KEY));
-    Assert.assertEquals("dummyCredential",
+    assertEquals("dummyCredential",
         conf.get(AZURE_AD_CLIENT_SECRET_KEY));
-    Assert.assertEquals("dummyClass",
+    assertEquals("dummyClass",
         conf.get(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY));
-    Assert.assertEquals("dummyTracker",
+    assertEquals("dummyTracker",
         conf.get(LATENCY_TRACKER_KEY));
   }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/481d79fe/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/Parallelized.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/Parallelized.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/Parallelized.java
index b08a892..7ac010c 100644
--- a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/Parallelized.java
+++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/Parallelized.java
@@ -39,7 +39,7 @@ public class Parallelized extends Parameterized {
   private static class ThreadPoolScheduler implements RunnerScheduler {
     private ExecutorService executor;
 
-    public ThreadPoolScheduler() {
+    ThreadPoolScheduler() {
       int numThreads = 10;
       executor = Executors.newFixedThreadPool(numThreads);
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org