You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/09/26 23:13:34 UTC

svn commit: r450208 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/dfs/DFSShell.java src/java/org/apache/hadoop/fs/FileSystem.java src/java/org/apache/hadoop/fs/LocalFileSystem.java src/test/org/apache/hadoop/fs/TestGlobPaths.java

Author: cutting
Date: Tue Sep 26 14:13:34 2006
New Revision: 450208

URL: http://svn.apache.org/viewvc?view=rev&rev=450208
Log:
HADOOP-559.  Add file name globbing (pattern matching) support to the FileSystem API and use it in DFSShell ('bin/hadoop dfs') commands.  Contributed by Hairong.

Added:
    lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java
Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSShell.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=450208&r1=450207&r2=450208
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Sep 26 14:13:34 2006
@@ -76,6 +76,10 @@
 19. HADOOP-487.  Throw a more informative exception for unknown RPC
     hosts.  (Sameer Paranjpye via cutting)
 
+20. HADOOP-559.  Add file name globbing (pattern matching) support to
+    the FileSystem API, and use it in DFSShell ('bin/hadoop dfs')
+    commands.  (Hairong Kuang via cutting)
+
 
 Release 0.6.2 (unreleased)
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSShell.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSShell.java?view=diff&rev=450208&r1=450207&r2=450208
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSShell.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSShell.java Tue Sep 26 14:13:34 2006
@@ -53,17 +53,35 @@
     }
 
     /**
-     * Obtain the indicated DFS file and copy to the local name.
-     * srcf is kept.
-     */
-    void copyToLocal(String srcf, Path dst) throws IOException {
-        fs.copyToLocalFile(new Path(srcf), dst);
+     * Obtain the indicated DFS files that match the file pattern <i>srcf</i>
+     * and copy them to the local name. srcf is kept.
+     * When copying mutiple files, the destination must be a directory. 
+     * Otherwise, IOException is thrown.
+     * @param srcf: a file pattern specifying source files
+     * @param dstf: a destination local file/directory 
+     * @exception: IOException  
+     * @see org.apache.hadoop.fs.FileSystem.globPaths 
+     */
+    void copyToLocal(String srcf, String dstf) throws IOException {
+      Path [] srcs = fs.globPaths( new Path(srcf) );
+      if( srcs.length > 1 && !new File( dstf ).isDirectory()) {
+        throw new IOException( "When copy multiple files, " 
+            + "destination should be a directory." );
+      }
+      Path dst = new Path( dstf );
+      for( int i=0; i<srcs.length; i++ ) {
+        fs.copyToLocalFile( srcs[i], dst );
+      }
     }
     
     /**
-     * Get all the files in the directory and output them to
-     * only one file on local fs 
+     * Get all the files in the directories that match the source file 
+     * pattern and merge and sort them to only one file on local fs 
      * srcf is kept.
+     * @param srcf: a file pattern specifying source files
+     * @param dstf: a destination local file/directory 
+     * @exception: IOException  
+     * @see org.apache.hadoop.fs.FileSystem.globPaths 
      */
     void copyMergeToLocal(String srcf, Path dst) throws IOException {
         copyMergeToLocal(srcf, dst, false);
@@ -71,21 +89,29 @@
     
 
     /**
-     * Get all the files in the directory and output them to
-     * only one file on local fs 
+     * Get all the files in the directories that match the source file pattern
+     * and merge and sort them to only one file on local fs 
      * srcf is kept.
      * 
      * Also adds a string between the files (useful for adding \n
      * to a text file)
+     * @param srcf: a file pattern specifying source files
+     * @param dstf: a destination local file/directory
+     * @param endline: if an end of line character is added to a text file 
+     * @exception: IOException  
+     * @see org.apache.hadoop.fs.FileSystem.globPaths 
      */
     void copyMergeToLocal(String srcf, Path dst, boolean endline) throws IOException {
+      Path [] srcs = fs.globPaths( new Path( srcf ) );
+      for( int i=0; i<srcs.length; i++ ) {
         if(endline) {
-            FileUtil.copyMerge(fs, new Path(srcf), 
+            FileUtil.copyMerge(fs, srcs[i], 
                     FileSystem.getNamed("local", conf), dst, false, conf, "\n");
         } else {
-            FileUtil.copyMerge(fs, new Path(srcf), 
+            FileUtil.copyMerge(fs, srcs[i], 
                     FileSystem.getNamed("local", conf), dst, false, conf, null);
         }
+      }
     }      
 
     /**
@@ -96,8 +122,24 @@
         System.err.println("Option '-moveToLocal' is not implemented yet.");
     }
 
+    /**
+     * Fetch all DFS files that match the file pattern <i>srcf</i> and display
+     * their content on stdout. 
+     * @param srcf: a file pattern specifying source files
+     * @exception: IOException
+     * @see org.apache.hadoop.fs.FileSystem.globPaths 
+     */
     void cat(String srcf) throws IOException {
-      FSDataInputStream in = fs.open(new Path(srcf));
+      Path [] srcs = fs.globPaths( new Path( srcf ) );
+      for( int i=0; i<srcs.length; i++ ) {
+        cat(srcs[i]);
+      }
+    }
+    
+
+    /* print the content of src to screen */
+    private void cat(Path src) throws IOException {
+      FSDataInputStream in = fs.open(src);
       try {
         BufferedReader din = new BufferedReader(new InputStreamReader(in));
         String line;
@@ -138,15 +180,29 @@
         System.exit(-1);
       }
       
-      setReplication(rep, new Path(cmd[pos]), recursive);
+      setReplication(rep, cmd[pos], recursive);
     }
     
     /**
-     * Set the replication for the path argument
+     * Set the replication for files that match file pattern <i>srcf</i>
      * if it's a directory and recursive is true,
      * set replication for all the subdirs and those files too
-     */
-    public void setReplication(short newRep, Path src, boolean recursive) throws IOException {
+     * @param newRep: new replication factor
+     * @param srcf: a file pattern specifying source files
+     * @param recursive: if need to set replication factor for files in subdirs
+     * @exception: IOException  
+     * @see org.apache.hadoop.fs.FileSystem.globPaths 
+     */
+    public void setReplication(short newRep, String srcf, boolean recursive)
+        throws IOException {
+      Path[] srcs = fs.globPaths( new Path(srcf) );
+      for( int i=0; i<srcs.length; i++ ) {
+        setReplication( newRep, srcs[i], recursive );
+      }
+    }
+    
+    private void setReplication(short newRep, Path src, boolean recursive)
+      throws IOException {
   	
     	if(!fs.isDirectory(src)) {
     		setFileReplication(src, newRep);
@@ -172,8 +228,8 @@
     /**
      * Actually set the replication for this file
      * If it fails either throw IOException or print an error msg
-     * @param file
-     * @param newRep
+     * @param file: a dfs file/directory
+     * @param newRep: new replication factor
      * @throws IOException
      */
     private void setFileReplication(Path file, short newRep) throws IOException {
@@ -187,14 +243,27 @@
     
     
     /**
-     * Get a listing of all files in DFS at the indicated name
-     */
-    public void ls(String src, boolean recursive) throws IOException {
-        Path items[] = fs.listPaths(new Path(src));
+     * Get a listing of all files in DFS that match the file pattern <i>srcf</i>
+     * @param srcf: a file pattern specifying source files
+     * @param recursive: if need to list files in subdirs
+     * @exception: IOException  
+     * @see org.apache.hadoop.fs.FileSystem.globPaths 
+     */
+    public void ls(String srcf, boolean recursive) throws IOException {
+      Path[] srcs = fs.globPaths( new Path(srcf) );
+      boolean printHeader = (srcs.length == 1) ? true: false;
+      for(int i=0; i<srcs.length; i++) {
+        ls(srcs[i], recursive, printHeader);
+      }
+    }
+
+    /* list all files in dfs under the directory <i>src</i>*/
+    private void ls(Path src, boolean recursive, boolean printHeader ) throws IOException {
+        Path items[] = fs.listPaths(src);
         if (items == null) {
             System.out.println("Could not get listing for " + src);
         } else {
-            if(!recursive) {
+            if(!recursive && printHeader ) {
             	System.out.println("Found " + items.length + " items");
             }
             for (int i = 0; i < items.length; i++) {
@@ -205,16 +274,20 @@
                                         ("<r " + fs.getReplication(cur) 
                                             + ">\t" + fs.getLength(cur))));
                 if(recursive && fs.isDirectory(cur)) {
-                  ls(cur.toString(), recursive);
+                  ls(cur, recursive, printHeader);
                 }
             }
         }
     }
 
     /**
+     * Show the size of all files in DFS that match the file pattern <i>srcf</i>
+     * @param srcf: a file pattern specifying source files
+     * @exception: IOException  
+     * @see org.apache.hadoop.fs.FileSystem.globPaths 
      */
     public void du(String src) throws IOException {
-        Path items[] = fs.listPaths(new Path(src));
+        Path items[] = fs.listPaths( fs.globPaths( new Path(src) ) );
         if (items == null) {
             System.out.println("Could not get listing for " + src);
         } else {
@@ -235,38 +308,79 @@
     }
     
     /**
-     * Rename an DFS file
+     * Move DFS files that match the file pattern <i>srcf</i>
+     * to a destination dfs file.
+     * When moving mutiple files, the destination must be a directory. 
+     * Otherwise, IOException is thrown.
+     * @param srcf: a file pattern specifying source files
+     * @param dstf: a destination local file/directory 
+     * @exception: IOException  
+     * @see org.apache.hadoop.fs.FileSystem.globPaths 
      */
     public void rename(String srcf, String dstf) throws IOException {
-        if (fs.rename(new Path(srcf), new Path(dstf))) {
-            System.out.println("Renamed " + srcf + " to " + dstf);
+      Path [] srcs = fs.globPaths( new Path(srcf) );
+      Path dst = new Path(dstf);
+      if( srcs.length > 1 && !fs.isDirectory(dst)) {
+        throw new IOException( "When moving multiple files, " 
+            + "destination should be a directory." );
+      }
+      for( int i=0; i<srcs.length; i++ ) {
+        if (fs.rename(srcs[i], dst)) {
+            System.out.println("Renamed " + srcs[i] + " to " + dstf);
         } else {
-            System.out.println("Rename failed");
+            System.out.println("Rename failed " + srcs[i]);
         }
+      }
     }
 
     /**
-     * Copy an DFS file
+     * Copy DFS files that match the file pattern <i>srcf</i>
+     * to a destination dfs file.
+     * When copying mutiple files, the destination must be a directory. 
+     * Otherwise, IOException is thrown.
+     * @param srcf: a file pattern specifying source files
+     * @param dstf: a destination local file/directory 
+     * @exception: IOException  
+     * @see org.apache.hadoop.fs.FileSystem.globPaths 
      */
     public void copy(String srcf, String dstf, Configuration conf) throws IOException {
-      FileUtil.copy(fs, new Path(srcf), fs, new Path(dstf), false, conf);
+      Path [] srcs = fs.globPaths( new Path(srcf) );
+      Path dst = new Path(dstf);
+      if( srcs.length > 1 && !fs.isDirectory(dst)) {
+        throw new IOException( "When copying multiple files, " 
+            + "destination should be a directory." );
+      }
+      for( int i=0; i<srcs.length; i++ ) {
+        FileUtil.copy(fs, srcs[i], fs, dst, false, conf);
+      }
     }
 
     /**
-     * Delete an DFS file
+     * Delete all files in DFS that match the file pattern <i>srcf</i>
+     * @param srcf: a file pattern specifying source files
+     * @param recursive: if need to delete subdirs
+     * @exception: IOException  
+     * @see org.apache.hadoop.fs.FileSystem.globPaths 
      */
     public void delete(String srcf, boolean recursive) throws IOException {
-      Path srcp = new Path(srcf);
-      if (fs.isDirectory(srcp) && !recursive) {
-        System.out.println("Cannot remove directory \"" + srcf +
+      Path [] srcs = fs.globPaths( new Path(srcf) );
+      for( int i=0; i<srcs.length; i++ ) {
+        delete(srcs[i], recursive);
+      }
+    }
+    
+    /* delete an DFS file */
+    private void delete(Path src, boolean recursive ) throws IOException {
+      if (fs.isDirectory(src) && !recursive) {
+        System.out.println("Cannot remove directory \"" + src +
                            "\", use -rmr instead");
         return;
       }
 
-      if (fs.delete(srcp)) {
-        System.out.println("Deleted " + srcf);
+      if (fs.delete(src)) {
+        System.out.println("Deleted " + src);
       } else {
-        System.out.println("Delete failed");
+        System.out.println("Delete failed " + src);
       }
     }
 
@@ -403,7 +517,7 @@
             } else if ("-moveFromLocal".equals(cmd)) {
                 moveFromLocal(new Path(argv[i++]), argv[i++]);
             } else if ("-get".equals(cmd) || "-copyToLocal".equals(cmd)) {
-                copyToLocal(argv[i++], new Path(argv[i++]));
+                copyToLocal(argv[i++], argv[i++]);
             } else if ("-getmerge".equals(cmd)) {
                 if(argv.length>i+2)
                     copyMergeToLocal(argv[i++], new Path(argv[i++]), Boolean.parseBoolean(argv[i++]));

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java?view=diff&rev=450208&r1=450207&r2=450208
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java Tue Sep 26 14:13:34 2006
@@ -18,6 +18,7 @@
 import java.io.*;
 import java.net.*;
 import java.util.*;
+import java.util.regex.Pattern;
 
 import org.apache.commons.logging.*;
 
@@ -481,15 +482,17 @@
       return result;
     }
 
+    final private static PathFilter DEFAULT_FILTER = new PathFilter() {
+      public boolean accept(Path file) {
+        return !isChecksumFile(file);
+      }     
+    };
+  
     /** List files in a directory. */
     public Path[] listPaths(Path f) throws IOException {
-      return listPaths(f, new PathFilter() {
-          public boolean accept(Path file) {
-            return !isChecksumFile(file);
-          }
-        });
+      return listPaths(f, DEFAULT_FILTER);
     }
-
+    
     /** List files in a directory. */
     public abstract Path[] listPathsRaw(Path f) throws IOException;
 
@@ -509,21 +512,253 @@
       }
       return result;
     }
-
-    /** Filter files in a directory. */
-    public Path[] listPaths(Path f, PathFilter filter) throws IOException {
-        Vector results = new Vector();
-        Path listing[] = listPathsRaw(f);
-        if (listing != null) {
-          for (int i = 0; i < listing.length; i++) {
-            if (filter.accept(listing[i])) {
-              results.add(listing[i]);
-            }
+    
+    /** Filter raw files in a directory. */
+    private void listPaths(ArrayList<Path> results, Path f, PathFilter filter)
+      throws IOException {
+      Path listing[] = listPathsRaw(f);
+      if (listing != null) {
+        for (int i = 0; i < listing.length; i++) {
+          if (filter.accept(listing[i])) {
+            results.add(listing[i]);
           }
         }
+      }      
+    }
+    
+    /** Filter raw files in a directory. */
+    public Path[] listPaths(Path f, PathFilter filter) throws IOException {
+        ArrayList<Path> results = new ArrayList<Path>();
+        listPaths(results, f, filter);
         return (Path[]) results.toArray(new Path[results.size()]);
     }
 
+    /** 
+     * Filter raw files in a list directories using the default checksum filter. 
+     * @param files: a list of paths
+     * @return a list of files under the source paths
+     * @exception IOException
+     */
+    public Path[] listPaths(Path[] files ) throws IOException {
+      return listPaths( files, DEFAULT_FILTER );
+    }
+    
+    /** 
+     * Filter raw files in a list directories using user-supplied path filter. 
+     * @param files: a list of paths
+     * @return a list of files under the source paths
+     * @exception IOException
+     */
+    public Path[] listPaths(Path[] files, PathFilter filter)
+    throws IOException {
+      ArrayList<Path> results = new ArrayList<Path>();
+      for(int i=0; i<files.length; i++) {
+        listPaths(results, files[i], filter);
+      }
+      return (Path[]) results.toArray(new Path[results.size()]);
+    }
+    
+    /**
+     * <p>Return all the files that match filePattern and are not checksum
+     * files. Results are sorted by their names.
+     * 
+     * <p>
+     * A filename pattern is composed of <i>regular</i> characters and
+     * <i>special pattern matching</i> characters, which are:
+     *
+     * <dl>
+     *  <dd>
+     *   <dl>
+     *    <p>
+     *    <dt> <tt> ? </tt>
+     *    <dd> Matches any single character.
+     *
+     *    <p>
+     *    <dt> <tt> * </tt>
+     *    <dd> Matches zero or more characters.
+     *
+     *    <p>
+     *    <dt> <tt> [<i>abc</i>] </tt>
+     *    <dd> Matches a single character from character set
+     *     <tt>{<i>a,b,c</i>}</tt>.
+     *
+     *    <p>
+     *    <dt> <tt> [<i>a</i>-<i>b</i>] </tt>
+     *    <dd> Matches a single character from the character range
+     *     <tt>{<i>a...b</i>}</tt>.  Note that character <tt><i>a</i></tt> must be
+     *     lexicographically less than or equal to character <tt><i>b</i></tt>.
+     *
+     *    <p>
+     *    <dt> <tt> [^<i>a</i>] </tt>
+     *    <dd> Matches a single character that is not from character set or range
+     *     <tt>{<i>a</i>}</tt>.  Note that the <tt>^</tt> character must occur
+     *     immediately to the right of the opening bracket.
+     *
+     *    <p>
+     *    <dt> <tt> \<i>c</i> </tt>
+     *    <dd> Removes (escapes) any special meaning of character <i>c</i>.
+     *
+     *   </dl>
+     *  </dd>
+     * </dl>
+     *
+     * @param filePattern: a regular expression specifying file pattern
+
+     * @return an array of paths that match the file pattern
+     * @throws IOException
+     */
+    public Path[] globPaths(Path filePattern) throws IOException {
+      return globPaths(filePattern, DEFAULT_FILTER);
+    }
+    
+    /** glob all the file names that matches filePattern
+     * and is accepted by filter
+     * @param 
+     */
+    public Path[] globPaths(Path filePattern, PathFilter filter) 
+        throws IOException {
+      Path [] parents = new Path[1];
+      int level = 0;
+      
+      String filename = filePattern.toString();
+      if("".equals(filename) || Path.SEPARATOR.equals(filename)) {
+        parents[0] = filePattern;
+        return parents;
+      }
+      
+      String [] components = filename.split(Path.SEPARATOR);
+      if(filePattern.isAbsolute()) {
+        parents[0] = new Path(Path.SEPARATOR);
+        level = 1;
+      } else {
+        parents[0] = new Path( "" );
+      }
+      
+      Path[] results = globPathsLevel(parents, components, level, filter);
+      Arrays.sort(results);
+      return results;
+    }
+    
+    private Path[] globPathsLevel(Path[] parents,
+        String [] filePattern, int level, PathFilter filter) throws IOException {
+      if (level == filePattern.length)
+        return parents;
+      GlobFilter fp = new GlobFilter(filePattern[level], filter);
+      if( fp.hasPattern()) {
+        parents = listPaths(parents, fp);
+      } else {
+        for(int i=0; i<parents.length; i++) {
+          parents[i] = new Path(parents[i], filePattern[level]);
+        }
+      }
+      return globPathsLevel(parents, filePattern, level+1, filter);      
+    }
+ 
+    private static class GlobFilter implements PathFilter {
+      private PathFilter userFilter = DEFAULT_FILTER;
+      private Pattern regex;
+      private boolean hasPattern = false;
+      
+      /** Default pattern character: Escape any special meaning. */
+      private static final char  PAT_ESCAPE =  '\\';
+      /** Default pattern character: Any single character. */
+      private static final char  PAT_ANY = '.';
+      /** Default pattern character: Character set close. */
+      private static final char  PAT_SET_CLOSE = ']';
+      
+      GlobFilter() {
+      }
+      
+      GlobFilter(String filePattern) throws IOException {
+        setRegex(filePattern);
+      }
+      
+      GlobFilter(String filePattern, PathFilter filter) throws IOException {
+        userFilter = filter;
+        setRegex(filePattern);
+      }
+      
+      void setRegex(String filePattern) throws IOException {
+        int len;
+        int setOpen;
+        boolean setRange;
+        StringBuffer fileRegex = new StringBuffer();
+
+        // Validate the pattern
+        len = filePattern.length();
+        if (len == 0)
+            return;
+
+        setOpen =  0;
+        setRange = false;
+
+        for (int i = 0;  i < len;  i++)
+        {
+            char  pCh;
+
+            // Examine a single pattern character
+            pCh = filePattern.charAt(i);            
+            if( pCh == PAT_ESCAPE ) {
+              fileRegex.append( pCh );
+              i++;
+              if (i >= len)
+                  error( "An escaped character does not present",
+                      filePattern, i);
+              pCh = filePattern.charAt(i);
+            } else if( pCh == '.' ) {
+              fileRegex.append( PAT_ESCAPE );
+            } else if( pCh == '*' ) {
+                fileRegex.append( PAT_ANY );
+                hasPattern = true;
+            } else if( pCh == '?' ) {
+                pCh = PAT_ANY ;
+                hasPattern = true;
+            } else if( pCh == '[' && setOpen == 0 ) {
+                setOpen++;
+                hasPattern = true;
+            } else if( pCh == '^' && setOpen > 0) {
+            } else if (pCh == '-'  &&  setOpen > 0) {
+                // Character set range
+                setRange = true;
+            } else if (pCh == PAT_SET_CLOSE  &&  setRange) {
+                // Incomplete character set range
+                error("Incomplete character set range", filePattern, i);
+            } else if (pCh == PAT_SET_CLOSE  &&  setOpen > 0) {
+                // End of a character set
+                if (setOpen < 2)
+                    error("Unexpected end of set", filePattern, i);
+                setOpen = 0;
+            } else if (setOpen > 0) {
+                // Normal character, or the end of a character set range
+                setOpen++;
+                setRange = false;
+            }
+            fileRegex.append( pCh );
+        }
+
+        // Check for a well-formed pattern
+        if (setOpen > 0  ||  setRange)
+        {
+            // Incomplete character set or character range
+            error("Expecting set closure character or end of range", filePattern, len);
+        }
+        regex = Pattern.compile(fileRegex.toString());
+      }
+      
+      boolean hasPattern() {
+        return hasPattern;
+      }
+      
+      public boolean accept(Path path) {
+        return regex.matcher(path.getName()).matches() && userFilter.accept(path);
+      }
+      
+      private void error(String s, String pattern, int pos) throws IOException {
+        throw new IOException("Illegal file pattern: "
+                                 +s+" for glob "+pattern + " at " + pos);
+      }
+    }
+    
     /**
      * Set the current working directory for the given file system.
      * All relative paths will be resolved relative to it.

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java?view=diff&rev=450208&r1=450207&r2=450208
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java Tue Sep 26 14:13:34 2006
@@ -218,15 +218,26 @@
     }
 
     public Path[] listPathsRaw(Path f) throws IOException {
-        String[] names = pathToFile(f).list();
-        if (names == null) {
+        File localf = pathToFile(f);
+        Path[] results;
+
+        if(!localf.exists())
           return null;
+        else if(localf.isFile()) {
+          results = new Path[1];
+          results[0] = f;
+          return results;
+        } else { //directory
+          String[] names = localf.list();
+          if (names == null) {
+            return null;
+          }
+          results = new Path[names.length];
+          for (int i = 0; i < names.length; i++) {
+            results[i] = new Path(f, names[i]);
+          }
+          return results;
         }
-        Path[] results = new Path[names.length];
-        for (int i = 0; i < names.length; i++) {
-          results[i] = new Path(f, names[i]);
-        }
-        return results;
     }
     
     /**

Added: lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java?view=auto&rev=450208
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java (added)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java Tue Sep 26 14:13:34 2006
@@ -0,0 +1,230 @@
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.dfs.MiniDFSCluster;
+
+import junit.framework.TestCase;
+
+public class TestGlobPaths extends TestCase {
+  
+  static private MiniDFSCluster dfsCluster;
+  static private FileSystem fs;
+  static final private int NUM_OF_PATHS = 4;
+  static final String USER_DIR = "/user/"+System.getProperty("user.name");
+  private Path[] path = new Path[NUM_OF_PATHS];
+  
+  protected void setUp() throws Exception {
+    try {
+      Configuration conf = new Configuration();
+      dfsCluster = new MiniDFSCluster(8889, conf, true);
+      fs = FileSystem.get(conf);
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+  
+  protected void tearDown() throws Exception {
+    dfsCluster.shutdown();
+  }
+  
+  public void testGlob() {
+    try {
+      pTestLiteral();
+      pTestAny();
+      pTestClosure();
+      pTestSet();
+      pTestRange();
+      pTestSetExcl();
+      pTestCombination();
+      pTestRelativePath();
+    } catch( IOException e) {
+      e.printStackTrace();
+    } 
+  }
+  
+  private void pTestLiteral() throws IOException {
+    try {
+      String [] files = new String[2];
+      files[0] = USER_DIR+"/a2c";
+      files[1] = USER_DIR+"/ab\\[c.d";
+      Path[] matchedPath = prepareTesting( USER_DIR+"/ab\\[c.d", files );
+      assertEquals( matchedPath.length, 1 );
+      assertEquals( matchedPath[0], path[1] );
+    } finally {
+      cleanupDFS();
+    }
+  }
+  
+  private void pTestAny() throws IOException {
+    try {
+      String [] files = new String[4];
+      files[0] = USER_DIR+"/abc";
+      files[1] = USER_DIR+"/a2c";
+      files[2] = USER_DIR+"/a.c";
+      files[3] = USER_DIR+"/abcd";
+      Path[] matchedPath = prepareTesting(USER_DIR+"/a?c", files);
+      assertEquals( matchedPath.length, 3 );
+      assertEquals( matchedPath[0], path[2] );
+      assertEquals( matchedPath[1], path[1] );
+      assertEquals( matchedPath[2], path[0] );
+    } finally {
+      cleanupDFS();
+    }
+  }
+  
+  private void pTestClosure() throws IOException {
+    pTestClosure1();
+    pTestClosure2();
+    pTestClosure3();
+  }
+  
+  private void pTestClosure1() throws IOException {
+    try {
+      String [] files = new String[4];
+      files[0] = USER_DIR+"/a";
+      files[1] = USER_DIR+"/abc";
+      files[2] = USER_DIR+"/abc.p";
+      files[3] = USER_DIR+"/bacd";
+      Path[] matchedPath = prepareTesting(USER_DIR+"/a*", files);
+      assertEquals( matchedPath.length, 3 );
+      assertEquals( matchedPath[0], path[0] );
+      assertEquals( matchedPath[1], path[1] );
+      assertEquals( matchedPath[2], path[2] );
+    } finally {
+      cleanupDFS();
+    }
+  }
+  
+  private void pTestClosure2() throws IOException {
+    try {
+      String [] files = new String[4];
+      files[0] = USER_DIR+"/a.";
+      files[1] = USER_DIR+"/a.txt";
+      files[2] = USER_DIR+"/a.old.java";
+      files[3] = USER_DIR+"/.java";
+      Path[] matchedPath = prepareTesting(USER_DIR+"/a.*", files);
+      assertEquals( matchedPath.length, 3 );
+      assertEquals( matchedPath[0], path[0] );
+      assertEquals( matchedPath[1], path[2] );
+      assertEquals( matchedPath[2], path[1] );
+    } finally {
+      cleanupDFS();
+    }
+  }
+  
+  private void pTestClosure3() throws IOException {
+    try {    
+      String [] files = new String[4];
+      files[0] = USER_DIR+"/a.txt.x";
+      files[1] = USER_DIR+"/ax";
+      files[2] = USER_DIR+"/ab37x";
+      files[3] = USER_DIR+"/bacd";
+      Path[] matchedPath = prepareTesting(USER_DIR+"/a*x", files);
+      assertEquals( matchedPath.length, 3 );
+      assertEquals( matchedPath[0], path[0] );
+      assertEquals( matchedPath[1], path[2] );
+      assertEquals( matchedPath[2], path[1] );
+    } finally {
+      cleanupDFS();
+    } 
+  }
+  
+  private void pTestSet() throws IOException {
+    try {    
+      String [] files = new String[4];
+      files[0] = USER_DIR+"/a.c";
+      files[1] = USER_DIR+"/a.cpp";
+      files[2] = USER_DIR+"/a.hlp";
+      files[3] = USER_DIR+"/a.hxy";
+      Path[] matchedPath = prepareTesting(USER_DIR+"/a.[ch]??", files);
+      assertEquals( matchedPath.length, 3 );
+      assertEquals( matchedPath[0], path[1] );
+      assertEquals( matchedPath[1], path[2] );
+      assertEquals( matchedPath[2], path[3] );
+    } finally {
+      cleanupDFS();
+    }
+  }
+  
+  private void pTestRange() throws IOException {
+    try {    
+      String [] files = new String[4];
+      files[0] = USER_DIR+"/a.d";
+      files[1] = USER_DIR+"/a.e";
+      files[2] = USER_DIR+"/a.f";
+      files[3] = USER_DIR+"/a.h";
+      Path[] matchedPath = prepareTesting(USER_DIR+"/a.[d-fm]", files);
+      assertEquals( matchedPath.length, 3 );
+      assertEquals( matchedPath[0], path[0] );
+      assertEquals( matchedPath[1], path[1] );
+      assertEquals( matchedPath[2], path[2] );
+    } finally {
+      cleanupDFS();
+    }
+  }
+  
+  private void pTestSetExcl() throws IOException {
+    try {    
+      String [] files = new String[4];
+      files[0] = USER_DIR+"/a.d";
+      files[1] = USER_DIR+"/a.e";
+      files[2] = USER_DIR+"/a.0";
+      files[3] = USER_DIR+"/a.h";
+      Path[] matchedPath = prepareTesting(USER_DIR+"/a.[^a-cg-z0-9]", files);
+      assertEquals( matchedPath.length, 2 );
+      assertEquals( matchedPath[0], path[0] );
+      assertEquals( matchedPath[1], path[1] );
+    } finally {
+      cleanupDFS();
+    }
+  }
+
+  private void pTestCombination() throws IOException {
+    try {    
+      String [] files = new String[4];
+      files[0] = "/user/aa/a.c";
+      files[1] = "/user/bb/a.cpp";
+      files[2] = "/user1/cc/b.hlp";
+      files[3] = "/user/dd/a.hxy";
+      Path[] matchedPath = prepareTesting("/use?/*/a.[ch]??", files);
+      assertEquals( matchedPath.length, 2 );
+      assertEquals( matchedPath[0], path[1] );
+      assertEquals( matchedPath[1], path[3] );
+    } finally {
+      cleanupDFS();
+    }
+  }
+  
+  private void pTestRelativePath() throws IOException {
+    try {
+      String [] files = new String[4];
+      files[0] = "a";
+      files[1] = "abc";
+      files[2] = "abc.p";
+      files[3] = "bacd";
+      Path[] matchedPath = prepareTesting("a*", files);
+      assertEquals( matchedPath.length, 3 );
+      assertEquals( matchedPath[0], new Path(USER_DIR, path[0]) );
+      assertEquals( matchedPath[1], new Path(USER_DIR, path[1]) );
+      assertEquals( matchedPath[2], new Path(USER_DIR, path[2]) );
+    } finally {
+      cleanupDFS();
+    }
+  }
+  
+  private Path[] prepareTesting( String pattern, String[] files)
+  throws IOException {
+    for(int i=0; i<Math.min(NUM_OF_PATHS, files.length); i++) {
+      path[i] = new Path( files[i] );
+      fs.mkdirs( path[i] );
+    }
+    return fs.globPaths( new Path(pattern) );
+  }
+  
+  private void cleanupDFS( ) throws IOException {
+    fs.delete( new Path("/user"));
+  }
+  
+}