You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by om...@apache.org on 2011/03/04 05:20:56 UTC
svn commit: r1077497 - in
/hadoop/common/branches/branch-0.20-security-patches/src:
core/org/apache/hadoop/fs/ test/org/apache/hadoop/fs/
Author: omalley
Date: Fri Mar 4 04:20:55 2011
New Revision: 1077497
URL: http://svn.apache.org/viewvc?rev=1077497&view=rev
Log:
commit 787be7e13d1edb7f9db39a0fc365fd4d7f7c04d7
Author: Luke Lu <ll...@yahoo-inc.com>
Date: Thu Jun 3 17:10:56 2010 -0700
HADOOP:6787 from https://issues.apache.org/jira/secure/attachment/12446045/hadoop-6787-y20s-v1.patch
Glob pattern code refactor/fixes.
Added:
hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/GlobFilter.java
hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/GlobPattern.java
hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/fs/TestGlobPattern.java
Modified:
hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/FileSystem.java
Modified: hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/FileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/FileSystem.java?rev=1077497&r1=1077496&r2=1077497&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/FileSystem.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/FileSystem.java Fri Mar 4 04:20:55 2011
@@ -998,127 +998,6 @@ public abstract class FileSystem extends
}
return globPathsLevel(parents, filePattern, level + 1, hasGlob);
}
-
- /* A class that could decide if a string matches the glob or not */
- private static class GlobFilter implements PathFilter {
- private PathFilter userFilter = DEFAULT_FILTER;
- private Pattern regex;
- private boolean hasPattern = false;
-
- /** Default pattern character: Escape any special meaning. */
- private static final char PAT_ESCAPE = '\\';
- /** Default pattern character: Any single character. */
- private static final char PAT_ANY = '.';
- /** Default pattern character: Character set close. */
- private static final char PAT_SET_CLOSE = ']';
-
- GlobFilter(String filePattern) throws IOException {
- setRegex(filePattern);
- }
-
- GlobFilter(String filePattern, PathFilter filter) throws IOException {
- userFilter = filter;
- setRegex(filePattern);
- }
-
- private boolean isJavaRegexSpecialChar(char pChar) {
- return pChar == '.' || pChar == '$' || pChar == '(' || pChar == ')' ||
- pChar == '|' || pChar == '+';
- }
- void setRegex(String filePattern) throws IOException {
- int len;
- int setOpen;
- int curlyOpen;
- boolean setRange;
-
- StringBuilder fileRegex = new StringBuilder();
-
- // Validate the pattern
- len = filePattern.length();
- if (len == 0)
- return;
-
- setOpen = 0;
- setRange = false;
- curlyOpen = 0;
-
- for (int i = 0; i < len; i++) {
- char pCh;
-
- // Examine a single pattern character
- pCh = filePattern.charAt(i);
- if (pCh == PAT_ESCAPE) {
- fileRegex.append(pCh);
- i++;
- if (i >= len)
- error("An escaped character does not present", filePattern, i);
- pCh = filePattern.charAt(i);
- } else if (isJavaRegexSpecialChar(pCh)) {
- fileRegex.append(PAT_ESCAPE);
- } else if (pCh == '*') {
- fileRegex.append(PAT_ANY);
- hasPattern = true;
- } else if (pCh == '?') {
- pCh = PAT_ANY;
- hasPattern = true;
- } else if (pCh == '{') {
- fileRegex.append('(');
- pCh = '(';
- curlyOpen++;
- hasPattern = true;
- } else if (pCh == ',' && curlyOpen > 0) {
- fileRegex.append(")|");
- pCh = '(';
- } else if (pCh == '}' && curlyOpen > 0) {
- // End of a group
- curlyOpen--;
- fileRegex.append(")");
- pCh = ')';
- } else if (pCh == '[' && setOpen == 0) {
- setOpen++;
- hasPattern = true;
- } else if (pCh == '^' && setOpen > 0) {
- } else if (pCh == '-' && setOpen > 0) {
- // Character set range
- setRange = true;
- } else if (pCh == PAT_SET_CLOSE && setRange) {
- // Incomplete character set range
- error("Incomplete character set range", filePattern, i);
- } else if (pCh == PAT_SET_CLOSE && setOpen > 0) {
- // End of a character set
- if (setOpen < 2)
- error("Unexpected end of set", filePattern, i);
- setOpen = 0;
- } else if (setOpen > 0) {
- // Normal character, or the end of a character set range
- setOpen++;
- setRange = false;
- }
- fileRegex.append(pCh);
- }
-
- // Check for a well-formed pattern
- if (setOpen > 0 || setRange || curlyOpen > 0) {
- // Incomplete character set or character range
- error("Expecting set closure character or end of range, or }",
- filePattern, len);
- }
- regex = Pattern.compile(fileRegex.toString());
- }
-
- boolean hasPattern() {
- return hasPattern;
- }
-
- public boolean accept(Path path) {
- return regex.matcher(path.getName()).matches() && userFilter.accept(path);
- }
-
- private void error(String s, String pattern, int pos) throws IOException {
- throw new IOException("Illegal file pattern: "
- +s+ " for glob "+ pattern + " at " + pos);
- }
- }
/** Return the current user's home directory in this filesystem.
* The default implementation returns "/user/$USER/".
Added: hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/GlobFilter.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/GlobFilter.java?rev=1077497&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/GlobFilter.java (added)
+++ hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/GlobFilter.java Fri Mar 4 04:20:55 2011
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.util.regex.PatternSyntaxException;
+import java.io.IOException;
+
+ // A class that could decide if a string matches the glob or not
+class GlobFilter implements PathFilter {
+ private final static PathFilter DEFAULT_FILTER = new PathFilter() {
+ public boolean accept(Path file) {
+ return true;
+ }
+ };
+
+ private PathFilter userFilter = DEFAULT_FILTER;
+ private GlobPattern pattern;
+
+ GlobFilter(String filePattern) throws IOException {
+ init(filePattern, DEFAULT_FILTER);
+ }
+
+ GlobFilter(String filePattern, PathFilter filter) throws IOException {
+ init(filePattern, filter);
+ }
+
+ void init(String filePattern, PathFilter filter) throws IOException {
+ try {
+ userFilter = filter;
+ pattern = new GlobPattern(filePattern);
+ }
+ catch (PatternSyntaxException e) {
+ // Existing code expects IOException startWith("Illegal file pattern")
+ throw new IOException("Illegal file pattern: "+ e.getMessage(), e);
+ }
+ }
+
+ boolean hasPattern() {
+ return pattern.hasWildcard();
+ }
+
+ public boolean accept(Path path) {
+ return pattern.matches(path.getName()) && userFilter.accept(path);
+ }
+}
Added: hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/GlobPattern.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/GlobPattern.java?rev=1077497&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/GlobPattern.java (added)
+++ hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/GlobPattern.java Fri Mar 4 04:20:55 2011
@@ -0,0 +1,164 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+/**
+ * A class for POSIX glob pattern with brace expansions.
+ */
+public class GlobPattern {
+ private static final char BACKSLASH = '\\';
+ private Pattern compiled;
+ private boolean hasWildcard = false;
+
+ /**
+ * Construct the glob pattern object with a glob pattern string
+ * @param globPattern the glob pattern string
+ */
+ public GlobPattern(String globPattern) {
+ set(globPattern);
+ }
+
+ /**
+ * @return the compiled pattern
+ */
+ public Pattern compiled() {
+ return compiled;
+ }
+
+ /**
+ * Compile glob pattern string
+ * @param globPattern the glob pattern
+ * @return the pattern object
+ */
+ public static Pattern compile(String globPattern) {
+ return new GlobPattern(globPattern).compiled();
+ }
+
+ /**
+ * Match input against the compiled glob pattern
+ * @param s input chars
+ * @return true for successful matches
+ */
+ public boolean matches(CharSequence s) {
+ return compiled.matcher(s).matches();
+ }
+
+ /**
+ * Set and compile a glob pattern
+ * @param glob the glob pattern string
+ */
+ public void set(String glob) {
+ StringBuilder regex = new StringBuilder();
+ int setOpen = 0;
+ int curlyOpen = 0;
+ int len = glob.length();
+ hasWildcard = false;
+
+ for (int i = 0; i < len; i++) {
+ char c = glob.charAt(i);
+
+ switch (c) {
+ case BACKSLASH:
+ if (++i >= len) {
+ error("Missing escaped character", glob, i);
+ }
+ regex.append(c).append(glob.charAt(i));
+ continue;
+ case '.':
+ case '$':
+ case '(':
+ case ')':
+ case '|':
+ case '+':
+ // escape regex special chars that are not glob special chars
+ regex.append(BACKSLASH);
+ break;
+ case '*':
+ regex.append('.');
+ hasWildcard = true;
+ break;
+ case '?':
+ regex.append('.');
+ hasWildcard = true;
+ continue;
+ case '{': // start of a group
+ regex.append("(?:"); // non-capturing
+ curlyOpen++;
+ hasWildcard = true;
+ continue;
+ case ',':
+ regex.append(curlyOpen > 0 ? '|' : c);
+ continue;
+ case '}':
+ if (curlyOpen > 0) {
+ // end of a group
+ curlyOpen--;
+ regex.append(")");
+ continue;
+ }
+ break;
+ case '[':
+ if (setOpen > 0) {
+ error("Unclosed character class", glob, i);
+ }
+ setOpen++;
+ hasWildcard = true;
+ break;
+ case '^': // ^ inside [...] can be unescaped
+ if (setOpen == 0) {
+ regex.append(BACKSLASH);
+ }
+ break;
+ case '!': // [! needs to be translated to [^
+ regex.append(setOpen > 0 && '[' == glob.charAt(i - 1) ? '^' : '!');
+ continue;
+ case ']':
+ // Many set errors like [][] could not be easily detected here,
+ // as []], []-] and [-] are all valid POSIX glob and java regex.
+ // We'll just let the regex compiler do the real work.
+ setOpen = 0;
+ break;
+ default:
+ }
+ regex.append(c);
+ }
+
+ if (setOpen > 0) {
+ error("Unclosed character class", glob, len);
+ }
+ if (curlyOpen > 0) {
+ error("Unclosed group", glob, len);
+ }
+ compiled = Pattern.compile(regex.toString());
+ }
+
+ /**
+ * @return true if this is a wildcard pattern (with special chars)
+ */
+ public boolean hasWildcard() {
+ return hasWildcard;
+ }
+
+ private static void error(String message, String pattern, int pos) {
+ throw new PatternSyntaxException(message, pattern, pos);
+ }
+}
Added: hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/fs/TestGlobPattern.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/fs/TestGlobPattern.java?rev=1077497&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/fs/TestGlobPattern.java (added)
+++ hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/fs/TestGlobPattern.java Fri Mar 4 04:20:55 2011
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.util.regex.PatternSyntaxException;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+/**
+ * Tests for glob patterns
+ */
+public class TestGlobPattern {
+ private void assertMatch(boolean yes, String glob, String...input) {
+ GlobPattern pattern = new GlobPattern(glob);
+
+ for (String s : input) {
+ boolean result = pattern.matches(s);
+ assertTrue(glob +" should"+ (yes ? "" : " not") +" match "+ s,
+ yes ? result : !result);
+ }
+ }
+
+ private void shouldThrow(String... globs) {
+ for (String glob : globs) {
+ try {
+ GlobPattern.compile(glob);
+ }
+ catch (PatternSyntaxException e) {
+ e.printStackTrace();
+ continue;
+ }
+ assertTrue("glob "+ glob +" should throw", false);
+ }
+ }
+
+ @Test public void testValidPatterns() {
+ assertMatch(true, "*", "^$", "foo", "bar");
+ assertMatch(true, "?", "?", "^", "[", "]", "$");
+ assertMatch(true, "foo*", "foo", "food", "fool");
+ assertMatch(true, "f*d", "fud", "food");
+ assertMatch(true, "*d", "good", "bad");
+ assertMatch(true, "\\*\\?\\[\\{\\\\", "*?[{\\");
+ assertMatch(true, "[]^-]", "]", "-", "^");
+ assertMatch(true, "]", "]");
+ assertMatch(true, "^.$()|+", "^.$()|+");
+ assertMatch(true, "[^^]", ".", "$", "[", "]");
+ assertMatch(false, "[^^]", "^");
+ assertMatch(true, "[!!-]", "^", "?");
+ assertMatch(false, "[!!-]", "!", "-");
+ assertMatch(true, "{[12]*,[45]*,[78]*}", "1", "2!", "4", "42", "7", "7$");
+ assertMatch(false, "{[12]*,[45]*,[78]*}", "3", "6", "9Ã");
+ assertMatch(true, "}", "}");
+ }
+
+ @Test public void testInvalidPatterns() {
+ shouldThrow("[", "[[]]", "[][]", "{", "\\");
+ }
+}