You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by to...@apache.org on 2008/09/05 12:41:44 UTC
svn commit: r692407 - in /hadoop/core/trunk: CHANGES.txt
src/core/org/apache/hadoop/fs/FileSystem.java
src/core/org/apache/hadoop/fs/GlobExpander.java
src/test/org/apache/hadoop/fs/TestGlobExpander.java
src/test/org/apache/hadoop/fs/TestGlobPaths.java
Author: tomwhite
Date: Fri Sep 5 03:41:43 2008
New Revision: 692407
URL: http://svn.apache.org/viewvc?rev=692407&view=rev
Log:
HADOOP-3498. File globbing alternation should be able to span path components.
Added:
hadoop/core/trunk/src/core/org/apache/hadoop/fs/GlobExpander.java
hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobExpander.java
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java
hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java
Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=692407&r1=692406&r2=692407&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Fri Sep 5 03:41:43 2008
@@ -267,6 +267,9 @@
HADOOP-3943. Remove unnecessary synchronization in
NetworkTopology.pseudoSortByDistance. (hairong via omalley)
+ HADOOP-3498. File globbing alternation should be able to span path
+ components. (tomwhite)
+
OPTIMIZATIONS
HADOOP-3556. Removed lock contention in MD5Hash by changing the
Modified: hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java?rev=692407&r1=692406&r2=692407&view=diff
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java (original)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java Fri Sep 5 03:41:43 2008
@@ -843,7 +843,7 @@
public FileStatus[] globStatus(Path pathPattern) throws IOException {
return globStatus(pathPattern, DEFAULT_FILTER);
}
-
+
/**
* Return an array of FileStatus objects whose path names match pathPattern
* and is accepted by the user-supplied path filter. Results are sorted by
@@ -860,6 +860,24 @@
*/
public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
throws IOException {
+ String filename = pathPattern.toUri().getPath();
+ List<String> filePatterns = GlobExpander.expand(filename);
+ if (filePatterns.size() == 1) {
+ return globStatusInternal(pathPattern, filter);
+ } else {
+ List<FileStatus> results = new ArrayList<FileStatus>();
+ for (String filePattern : filePatterns) {
+ FileStatus[] files = globStatusInternal(new Path(filePattern), filter);
+ for (FileStatus file : files) {
+ results.add(file);
+ }
+ }
+ return results.toArray(new FileStatus[results.size()]);
+ }
+ }
+
+ private FileStatus[] globStatusInternal(Path pathPattern, PathFilter filter)
+ throws IOException {
Path[] parents = new Path[1];
int level = 0;
String filename = pathPattern.toUri().getPath();
Added: hadoop/core/trunk/src/core/org/apache/hadoop/fs/GlobExpander.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/fs/GlobExpander.java?rev=692407&view=auto
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/fs/GlobExpander.java (added)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/fs/GlobExpander.java Fri Sep 5 03:41:43 2008
@@ -0,0 +1,166 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+class GlobExpander {
+
+ static class StringWithOffset {
+ String string;
+ int offset;
+ public StringWithOffset(String string, int offset) {
+ super();
+ this.string = string;
+ this.offset = offset;
+ }
+ }
+
+ /**
+ * Expand globs in the given <code>filePattern</code> into a collection of
+ * file patterns so that in the expanded set no file pattern has a
+ * slash character ("/") in a curly bracket pair.
+ * @param filePattern
+ * @return expanded file patterns
+ * @throws IOException
+ */
+ public static List<String> expand(String filePattern) throws IOException {
+ List<String> fullyExpanded = new ArrayList<String>();
+ List<StringWithOffset> toExpand = new ArrayList<StringWithOffset>();
+ toExpand.add(new StringWithOffset(filePattern, 0));
+ while (!toExpand.isEmpty()) {
+ StringWithOffset path = toExpand.remove(0);
+ List<StringWithOffset> expanded = expandLeftmost(path);
+ if (expanded == null) {
+ fullyExpanded.add(path.string);
+ } else {
+ toExpand.addAll(0, expanded);
+ }
+ }
+ return fullyExpanded;
+ }
+
+ /**
+ * Expand the leftmost outer curly bracket pair containing a
+ * slash character ("/") in <code>filePattern</code>.
+ * @param filePattern
+ * @return expanded file patterns
+ * @throws IOException
+ */
+ private static List<StringWithOffset> expandLeftmost(StringWithOffset
+ filePatternWithOffset) throws IOException {
+
+ String filePattern = filePatternWithOffset.string;
+ int leftmost = leftmostOuterCurlyContainingSlash(filePattern,
+ filePatternWithOffset.offset);
+ if (leftmost == -1) {
+ return null;
+ }
+ int curlyOpen = 0;
+ StringBuilder prefix = new StringBuilder(filePattern.substring(0, leftmost));
+ StringBuilder suffix = new StringBuilder();
+ List<String> alts = new ArrayList<String>();
+ StringBuilder alt = new StringBuilder();
+ StringBuilder cur = prefix;
+ for (int i = leftmost; i < filePattern.length(); i++) {
+ char c = filePattern.charAt(i);
+ if (cur == suffix) {
+ cur.append(c);
+ } else if (c == '\\') {
+ i++;
+ if (i >= filePattern.length()) {
+ throw new IOException("Illegal file pattern: "
+ + "An escaped character does not present for glob "
+ + filePattern + " at " + i);
+ }
+ c = filePattern.charAt(i);
+ cur.append(c);
+ } else if (c == '{') {
+ if (curlyOpen++ == 0) {
+ alt.setLength(0);
+ cur = alt;
+ } else {
+ cur.append(c);
+ }
+
+ } else if (c == '}' && curlyOpen > 0) {
+ if (--curlyOpen == 0) {
+ alts.add(alt.toString());
+ alt.setLength(0);
+ cur = suffix;
+ } else {
+ cur.append(c);
+ }
+ } else if (c == ',') {
+ if (curlyOpen == 1) {
+ alts.add(alt.toString());
+ alt.setLength(0);
+ } else {
+ cur.append(c);
+ }
+ } else {
+ cur.append(c);
+ }
+ }
+ List<StringWithOffset> exp = new ArrayList<StringWithOffset>();
+ for (String string : alts) {
+ exp.add(new StringWithOffset(prefix + string + suffix, prefix.length()));
+ }
+ return exp;
+ }
+
+ /**
+ * Finds the index of the leftmost opening curly bracket containing a
+ * slash character ("/") in <code>filePattern</code>.
+ * @param filePattern
+ * @return the index of the leftmost opening curly bracket containing a
+ * slash character ("/"), or -1 if there is no such bracket
+ * @throws IOException
+ */
+ private static int leftmostOuterCurlyContainingSlash(String filePattern,
+ int offset) throws IOException {
+ int curlyOpen = 0;
+ int leftmost = -1;
+ boolean seenSlash = false;
+ for (int i = offset; i < filePattern.length(); i++) {
+ char c = filePattern.charAt(i);
+ if (c == '\\') {
+ i++;
+ if (i >= filePattern.length()) {
+ throw new IOException("Illegal file pattern: "
+ + "An escaped character does not present for glob "
+ + filePattern + " at " + i);
+ }
+ } else if (c == '{') {
+ if (curlyOpen++ == 0) {
+ leftmost = i;
+ }
+ } else if (c == '}' && curlyOpen > 0) {
+ if (--curlyOpen == 0 && leftmost != -1 && seenSlash) {
+ return leftmost;
+ }
+ } else if (c == '/' && curlyOpen > 0) {
+ seenSlash = true;
+ }
+ }
+ return -1;
+ }
+
+}
Added: hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobExpander.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobExpander.java?rev=692407&view=auto
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobExpander.java (added)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobExpander.java Fri Sep 5 03:41:43 2008
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+import java.util.List;
+
+import junit.framework.TestCase;
+
+public class TestGlobExpander extends TestCase {
+
+ public void testExpansionIsIdentical() throws IOException {
+ checkExpansionIsIdentical("");
+ checkExpansionIsIdentical("/}");
+ checkExpansionIsIdentical("/}{a,b}");
+ checkExpansionIsIdentical("{/");
+ checkExpansionIsIdentical("{a}");
+ checkExpansionIsIdentical("{a,b}/{b,c}");
+ checkExpansionIsIdentical("p\\{a/b,c/d\\}s");
+ checkExpansionIsIdentical("p{a\\/b,c\\/d}s");
+ }
+
+ public void testExpansion() throws IOException {
+ checkExpansion("{a/b}", "a/b");
+ checkExpansion("/}{a/b}", "/}a/b");
+ checkExpansion("p{a/b,c/d}s", "pa/bs", "pc/ds");
+ checkExpansion("{a/b,c/d,{e,f}}", "a/b", "c/d", "{e,f}");
+ checkExpansion("{a/b,c/d}{e,f}", "a/b{e,f}", "c/d{e,f}");
+ checkExpansion("{a,b}/{b,{c/d,e/f}}", "{a,b}/b", "{a,b}/c/d", "{a,b}/e/f");
+ checkExpansion("{a,b}/{c/\\d}", "{a,b}/c/d");
+ }
+
+ private void checkExpansionIsIdentical(String filePattern) throws IOException {
+ checkExpansion(filePattern, filePattern);
+ }
+
+ private void checkExpansion(String filePattern, String... expectedExpansions)
+ throws IOException {
+ List<String> actualExpansions = GlobExpander.expand(filePattern);
+ assertEquals("Different number of expansions", expectedExpansions.length,
+ actualExpansions.size());
+ for (int i = 0; i < expectedExpansions.length; i++) {
+ assertEquals("Expansion of " + filePattern, expectedExpansions[i],
+ actualExpansions.get(i));
+ }
+ }
+}
Modified: hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java?rev=692407&r1=692406&r2=692407&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java Fri Sep 5 03:41:43 2008
@@ -266,6 +266,28 @@
} finally {
cleanupDFS();
}
+ // cross-component curlies
+ try {
+ files = new String[] { USER_DIR+"/a/b", USER_DIR+"/a/d",
+ USER_DIR+"/c/b", USER_DIR+"/c/d" };
+ matchedPath = prepareTesting(USER_DIR+"/{a/b,c/d}", files);
+ assertEquals(matchedPath.length, 2);
+ assertEquals(matchedPath[0], path[0]);
+ assertEquals(matchedPath[1], path[3]);
+ } finally {
+ cleanupDFS();
+ }
+ // cross-component absolute curlies
+ try {
+ files = new String[] { "/a/b", "/a/d",
+ "/c/b", "/c/d" };
+ matchedPath = prepareTesting("{/a/b,/c/d}", files);
+ assertEquals(matchedPath.length, 2);
+ assertEquals(matchedPath[0], path[0]);
+ assertEquals(matchedPath[1], path[3]);
+ } finally {
+ cleanupDFS();
+ }
try {
// test standalone }
files = new String[] {USER_DIR+"/}bc", USER_DIR+"/}c"};