You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ge...@apache.org on 2015/02/06 09:33:23 UTC
hadoop git commit: HADOOP-11506. Configuration variable expansion
regex expensive for long values. (Gera Shegalov via gera)
Repository: hadoop
Updated Branches:
refs/heads/trunk 02f154a00 -> 644548f20
HADOOP-11506. Configuration variable expansion regex expensive for long values. (Gera Shegalov via gera)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/644548f2
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/644548f2
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/644548f2
Branch: refs/heads/trunk
Commit: 644548f201743408904dfe24b9f5b515b2c96713
Parents: 02f154a
Author: Gera Shegalov <ge...@apache.org>
Authored: Fri Jan 30 23:18:03 2015 -0800
Committer: Gera Shegalov <ge...@apache.org>
Committed: Fri Feb 6 00:27:54 2015 -0800
----------------------------------------------------------------------
hadoop-common-project/hadoop-common/CHANGES.txt | 3 +
.../org/apache/hadoop/conf/Configuration.java | 124 ++++++++++++++++---
.../apache/hadoop/conf/TestConfiguration.java | 54 +++++++-
3 files changed, 159 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/644548f2/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 44adf7f..66c2cba 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -573,6 +573,9 @@ Release 2.7.0 - UNRELEASED
HADOOP-11188. hadoop-azure: automatically expand page blobs when they become
full. (Eric Hanson via cnauroth)
+ HADOOP-11506. Configuration variable expansion regex expensive for long
+ values. (Gera Shegalov via gera)
+
BUG FIXES
HADOOP-11488. Difference in default connection timeout for S3A FS
http://git-wip-us.apache.org/repos/asf/hadoop/blob/644548f2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
index 8f98d0a..ea0d3a6 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
@@ -187,7 +187,7 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
"testingforemptydefaultvalue";
private boolean allowNullValueProperties = false;
-
+
private static class Resource {
private final Object resource;
private final String name;
@@ -845,31 +845,106 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
resources.add(resource); // add to resources
reloadConfiguration();
}
-
- private static final Pattern VAR_PATTERN =
- Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
private static final int MAX_SUBST = 20;
+ private static final int SUB_START_IDX = 0;
+ private static final int SUB_END_IDX = SUB_START_IDX + 1;
+
+ /**
+ * This is a manual implementation of the following regex
+ * "\\$\\{[^\\}\\$\u0020]+\\}". It can be 15x more efficient than
+ * a regex matcher as demonstrated by HADOOP-11506. This is noticeable with
+ * Hadoop apps building on the assumption Configuration#get is an O(1)
+ * hash table lookup, especially when the eval is a long string.
+ *
+ * @param eval a string that may contain variables requiring expansion.
+ * @return a 2-element int array res such that
+ * eval.substring(res[0], res[1]) is "var" for the left-most occurrence of
+ * ${var} in eval. If no variable is found -1, -1 is returned.
+ */
+ private static int[] findSubVariable(String eval) {
+ int[] result = {-1, -1};
+
+ int matchStart;
+ int leftBrace;
+
+ // scanning for a brace first because it's less frequent than $
+ // that can occur in nested class names
+ //
+ match_loop:
+ for (matchStart = 1, leftBrace = eval.indexOf('{', matchStart);
+ // minimum left brace position (follows '$')
+ leftBrace > 0
+ // right brace of a smallest valid expression "${c}"
+ && leftBrace + "{c".length() < eval.length();
+ leftBrace = eval.indexOf('{', matchStart)) {
+ int matchedLen = 0;
+ if (eval.charAt(leftBrace - 1) == '$') {
+ int subStart = leftBrace + 1; // after '{'
+ for (int i = subStart; i < eval.length(); i++) {
+ switch (eval.charAt(i)) {
+ case '}':
+ if (matchedLen > 0) { // match
+ result[SUB_START_IDX] = subStart;
+ result[SUB_END_IDX] = subStart + matchedLen;
+ break match_loop;
+ }
+ // fall through to skip 1 char
+ case ' ':
+ case '$':
+ matchStart = i + 1;
+ continue match_loop;
+ default:
+ matchedLen++;
+ }
+ }
+ // scanned from "${" to the end of eval, and no reset via ' ', '$':
+ // no match!
+ break match_loop;
+ } else {
+ // not a start of a variable
+ //
+ matchStart = leftBrace + 1;
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Attempts to repeatedly expand the value {@code expr} by replacing the
+ * left-most substring of the form "${var}" in the following precedence order
+ * <ol>
+ * <li>by the value of the Java system property "var" if defined</li>
+ * <li>by the value of the configuration key "var" if defined</li>
+ * </ol>
+ *
+ * If var is unbounded the current state of expansion "prefix${var}suffix" is
+ * returned.
+ *
+ * If a cycle is detected: replacing var1 requires replacing var2 ... requires
+ * replacing var1, i.e., the cycle is shorter than
+ * {@link Configuration#MAX_SUBST} then the original expr is returned.
+ *
+ * @param expr the literal value of a config key
+ * @return null if expr is null, otherwise the value resulting from expanding
+ * expr using the algorithm above.
+ * @throws IllegalArgumentException when more than
+ * {@link Configuration#MAX_SUBST} replacements are required
+ */
private String substituteVars(String expr) {
if (expr == null) {
return null;
}
- Matcher match = VAR_PATTERN.matcher("");
String eval = expr;
- Set<String> evalSet = new HashSet<String>();
- for(int s=0; s<MAX_SUBST; s++) {
- if (evalSet.contains(eval)) {
- // Cyclic resolution pattern detected. Return current expression.
+ Set<String> evalSet = null;
+ for(int s = 0; s < MAX_SUBST; s++) {
+ final int[] varBounds = findSubVariable(eval);
+ if (varBounds[SUB_START_IDX] == -1) {
return eval;
}
- evalSet.add(eval);
- match.reset(eval);
- if (!match.find()) {
- return eval;
- }
- String var = match.group();
- var = var.substring(2, var.length()-1); // remove ${ .. }
+ final String var = eval.substring(varBounds[SUB_START_IDX],
+ varBounds[SUB_END_IDX]);
String val = null;
try {
val = System.getProperty(var);
@@ -882,8 +957,23 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
if (val == null) {
return eval; // return literal ${var}: var is unbound
}
+
+ // prevent recursive resolution
+ //
+ final int dollar = varBounds[SUB_START_IDX] - "${".length();
+ final int afterRightBrace = varBounds[SUB_END_IDX] + "}".length();
+ final String refVar = eval.substring(dollar, afterRightBrace);
+ if (evalSet == null) {
+ evalSet = new HashSet<String>();
+ }
+ if (!evalSet.add(refVar)) {
+ return expr; // return original expression if there is a loop
+ }
+
// substitute
- eval = eval.substring(0, match.start())+val+eval.substring(match.end());
+ eval = eval.substring(0, dollar)
+ + val
+ + eval.substring(afterRightBrace);
}
throw new IllegalStateException("Variable substitution depth too large: "
+ MAX_SUBST + " " + expr);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/644548f2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
index 7b4fbb5..b84045d 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
@@ -1271,12 +1271,56 @@ public class TestConfiguration extends TestCase {
}
public void testInvalidSubstitutation() {
+ final Configuration configuration = new Configuration(false);
+
+ // 2-var loops
+ //
+ final String key = "test.random.key";
+ for (String keyExpression : Arrays.asList(
+ "${" + key + "}",
+ "foo${" + key + "}",
+ "foo${" + key + "}bar",
+ "${" + key + "}bar")) {
+ configuration.set(key, keyExpression);
+ assertEquals("Unexpected value", keyExpression, configuration.get(key));
+ }
+
+ //
+ // 3-variable loops
+ //
+
+ final String expVal1 = "${test.var2}";
+ String testVar1 = "test.var1";
+ configuration.set(testVar1, expVal1);
+ configuration.set("test.var2", "${test.var3}");
+ configuration.set("test.var3", "${test.var1}");
+ assertEquals("Unexpected value", expVal1, configuration.get(testVar1));
+
+ // 3-variable loop with non-empty value prefix/suffix
+ //
+ final String expVal2 = "foo2${test.var2}bar2";
+ configuration.set(testVar1, expVal2);
+ configuration.set("test.var2", "foo3${test.var3}bar3");
+ configuration.set("test.var3", "foo1${test.var1}bar1");
+ assertEquals("Unexpected value", expVal2, configuration.get(testVar1));
+ }
+
+ public void testIncompleteSubbing() {
+ Configuration configuration = new Configuration(false);
String key = "test.random.key";
- String keyExpression = "${" + key + "}";
- Configuration configuration = new Configuration();
- configuration.set(key, keyExpression);
- String value = configuration.get(key);
- assertTrue("Unexpected value " + value, value.equals(keyExpression));
+ for (String keyExpression : Arrays.asList(
+ "{}",
+ "${}",
+ "{" + key,
+ "${" + key,
+ "foo${" + key,
+ "foo${" + key + "bar",
+ "foo{" + key + "}bar",
+ "${" + key + "bar")) {
+ configuration.set(key, keyExpression);
+ String value = configuration.get(key);
+ assertTrue("Unexpected value " + value, value.equals(keyExpression));
+ }
}
public void testBoolean() {