You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ol...@apache.org on 2009/05/22 02:29:08 UTC

svn commit: r777334 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt test/org/apache/pig/test/TestLoad.java test/org/apache/pig/test/TestTypeChecking.java

Author: olga
Date: Fri May 22 00:29:08 2009
New Revision: 777334

URL: http://svn.apache.org/viewvc?rev=777334&view=rev
Log:
PIG-811: Globs with ? in the pattern are broken in local mode
(hagleitn via olgan)

Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
    hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=777334&r1=777333&r2=777334&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri May 22 00:29:08 2009
@@ -48,6 +48,9 @@
 
 BUG FIXES
 
+PIG-811: Globs with "?" in the pattern are broken in local mode (hagleitn via
+olgan)
+
 PIG-810: Fixed NPE in PigStats (gates)
 
 PIG-804: problem with lineage with double map redirection (pradeepkth)

Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=777334&r1=777333&r2=777334&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Fri May 22 00:29:08 2009
@@ -175,50 +175,58 @@
 	
 
     String massageFilename(String filename, PigContext pigContext, boolean isLoad) throws IOException, ParseException {
-        if (filename.startsWith(FileLocalizer.LOCAL_PREFIX)&&isLoad) {
-            filename = FileLocalizer.hadoopify(filename, pigContext);
-        } 
+
+        // If multiquery is off we revert to the old behavior, which
+        // did not try to convert paths to their absolute location.
     	boolean isMultiQuery = "true".equalsIgnoreCase(pigContext.getProperties().getProperty("opt.multiquery","true"));
         if (!isMultiQuery) {
-            if(!isLoad) {
+            if (!isLoad) { // stores do not require any change
                 return filename;
             }
+
+            // Local loads in the hadoop context require copying the
+            // file to dfs first.
+            if (pigContext.getExecType() != ExecType.LOCAL 
+                && filename.startsWith(FileLocalizer.LOCAL_PREFIX)) {
+                    filename = FileLocalizer.hadoopify(filename, pigContext);
+            }
             return filename;
         }
 
         String fname;
 
+        // If we converted the file name before, we return the old
+        // result. This is not done for performance but to make sure
+        // we return the same result for relative paths when
+        // re-parsing the same script for execution.
         if (null != (fname = fileNameMap.get(filename))) {
             return fname;
         } else {
             fname = filename;
         }
 
-        URI uri = null;
-        try {
-            uri = new URI(fname);
-        } catch (URISyntaxException ue) {
-            log.info(fname + " is not a valid URI.");
-            return fname;
-        }
-        String scheme = uri.getScheme();
-        if (scheme != null) {
-            scheme = scheme.toLowerCase();
-        }
-        
-        String path = uri.getPath();
-        if (path == null) {
-            // file:foo is not a valid uri. So the path ends up in the scheme specific part...
-            path = uri.getSchemeSpecificPart();
-        }
+        String scheme, path;
 
-        if ((scheme == null && pigContext.getExecType() == ExecType.MAPREDUCE) ||
-            "hdfs".equalsIgnoreCase(scheme)) {
-            // We need to get the path from a hadoop path object,
-            // otherwise special glob characters could get removed.
-            path = new Path(fname).toUri().getPath();
+        if (fname.startsWith(FileLocalizer.LOCAL_PREFIX)) {
+            // We don't use hadoop path class to do the parsing,
+            // because our syntax of saying 'file:foo' for relative
+            // paths on the local FS is not a valid URL.
+            scheme = "file";
+            path = fname.substring(FileLocalizer.LOCAL_PREFIX.length());
+        } else {
+            // Path implements a custom uri parsing that takes care of
+            // unescaped characters (think globs). Using "new
+            // URI(fname)" would break.
+            URI uri = new Path(fname).toUri();
+            
+            scheme = uri.getScheme();
+            if (scheme != null) {
+                scheme = scheme.toLowerCase();
+            }
+            
+            path = uri.getPath();
         }
-
+        
         if (scheme == null || scheme.equals("file") || scheme.equals("hdfs")) {
             if (pigContext.getExecType() != ExecType.LOCAL) {
                 if (fname.startsWith(FileLocalizer.LOCAL_PREFIX)) {
@@ -233,6 +241,7 @@
             ElementDescriptor el = dfs.asElement(desc, path);
             fname = el.toString();
         }
+
         if (!fname.equals(filename)) {
             fileNameMap.put(filename, fname);
         }

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java?rev=777334&r1=777333&r2=777334&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java Fri May 22 00:29:08 2009
@@ -158,6 +158,11 @@
         checkLoadPath("/tmp/foo/../././","/tmp");
     }
 
+    @Test
+    public void testGlobChars() throws Exception {
+        checkLoadPath("t?s*","/tmp/t?s*");
+    }
+
     private void checkLoadPath(String orig, String expected) throws Exception {
         checkLoadPath(orig, expected, false);
     }

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java?rev=777334&r1=777333&r2=777334&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java Fri May 22 00:29:08 2009
@@ -244,7 +244,7 @@
 
     public void testSUM1() throws Throwable {
         TypeCheckingTestUtil.printCurrentMethodName() ;
-        planTester.buildPlan("a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);") ;
+        planTester.buildPlan("a = load '/user/pig/tests/data/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);") ;
         LogicalPlan plan1 = planTester.buildPlan("b = foreach a generate (long)age as age:long, (int)gpa as gpa:int;") ;
         LogicalPlan plan2 = planTester.buildPlan("c = foreach b generate SUM(age), SUM(gpa);") ;
         planTester.typeCheckPlan(plan2);