You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ol...@apache.org on 2009/05/22 02:29:08 UTC
svn commit: r777334 - in /hadoop/pig/trunk: CHANGES.txt
src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
test/org/apache/pig/test/TestLoad.java
test/org/apache/pig/test/TestTypeChecking.java
Author: olga
Date: Fri May 22 00:29:08 2009
New Revision: 777334
URL: http://svn.apache.org/viewvc?rev=777334&view=rev
Log:
PIG-811: Globs with ? in the pattern are broken in local mode
(hagleitn via olgan)
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java
hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=777334&r1=777333&r2=777334&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri May 22 00:29:08 2009
@@ -48,6 +48,9 @@
BUG FIXES
+PIG-811: Globs with "?" in the pattern are broken in local mode (hagleitn via
+olgan)
+
PIG-810: Fixed NPE in PigStats (gates)
PIG-804: problem with lineage with double map redirection (pradeepkth)
Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=777334&r1=777333&r2=777334&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Fri May 22 00:29:08 2009
@@ -175,50 +175,58 @@
String massageFilename(String filename, PigContext pigContext, boolean isLoad) throws IOException, ParseException {
- if (filename.startsWith(FileLocalizer.LOCAL_PREFIX)&&isLoad) {
- filename = FileLocalizer.hadoopify(filename, pigContext);
- }
+
+ // If multiquery is off we revert to the old behavior, which
+ // did not try to convert paths to their absolute location.
boolean isMultiQuery = "true".equalsIgnoreCase(pigContext.getProperties().getProperty("opt.multiquery","true"));
if (!isMultiQuery) {
- if(!isLoad) {
+ if (!isLoad) { // stores do not require any change
return filename;
}
+
+ // Local loads in the hadoop context require copying the
+ // file to dfs first.
+ if (pigContext.getExecType() != ExecType.LOCAL
+ && filename.startsWith(FileLocalizer.LOCAL_PREFIX)) {
+ filename = FileLocalizer.hadoopify(filename, pigContext);
+ }
return filename;
}
String fname;
+ // If we converted the file name before, we return the old
+ // result. This is not done for performance but to make sure
+ // we return the same result for relative paths when
+ // re-parsing the same script for execution.
if (null != (fname = fileNameMap.get(filename))) {
return fname;
} else {
fname = filename;
}
- URI uri = null;
- try {
- uri = new URI(fname);
- } catch (URISyntaxException ue) {
- log.info(fname + " is not a valid URI.");
- return fname;
- }
- String scheme = uri.getScheme();
- if (scheme != null) {
- scheme = scheme.toLowerCase();
- }
-
- String path = uri.getPath();
- if (path == null) {
- // file:foo is not a valid uri. So the path ends up in the scheme specific part...
- path = uri.getSchemeSpecificPart();
- }
+ String scheme, path;
- if ((scheme == null && pigContext.getExecType() == ExecType.MAPREDUCE) ||
- "hdfs".equalsIgnoreCase(scheme)) {
- // We need to get the path from a hadoop path object,
- // otherwise special glob characters could get removed.
- path = new Path(fname).toUri().getPath();
+ if (fname.startsWith(FileLocalizer.LOCAL_PREFIX)) {
+ // We don't use hadoop path class to do the parsing,
+ // because our syntax of saying 'file:foo' for relative
+ // paths on the local FS is not a valid URL.
+ scheme = "file";
+ path = fname.substring(FileLocalizer.LOCAL_PREFIX.length());
+ } else {
+ // Path implements a custom uri parsing that takes care of
+ // unescaped characters (think globs). Using "new
+ // URI(fname)" would break.
+ URI uri = new Path(fname).toUri();
+
+ scheme = uri.getScheme();
+ if (scheme != null) {
+ scheme = scheme.toLowerCase();
+ }
+
+ path = uri.getPath();
}
-
+
if (scheme == null || scheme.equals("file") || scheme.equals("hdfs")) {
if (pigContext.getExecType() != ExecType.LOCAL) {
if (fname.startsWith(FileLocalizer.LOCAL_PREFIX)) {
@@ -233,6 +241,7 @@
ElementDescriptor el = dfs.asElement(desc, path);
fname = el.toString();
}
+
if (!fname.equals(filename)) {
fileNameMap.put(filename, fname);
}
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java?rev=777334&r1=777333&r2=777334&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestLoad.java Fri May 22 00:29:08 2009
@@ -158,6 +158,11 @@
checkLoadPath("/tmp/foo/../././","/tmp");
}
+ @Test
+ public void testGlobChars() throws Exception {
+ checkLoadPath("t?s*","/tmp/t?s*");
+ }
+
private void checkLoadPath(String orig, String expected) throws Exception {
checkLoadPath(orig, expected, false);
}
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java?rev=777334&r1=777333&r2=777334&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestTypeChecking.java Fri May 22 00:29:08 2009
@@ -244,7 +244,7 @@
public void testSUM1() throws Throwable {
TypeCheckingTestUtil.printCurrentMethodName() ;
- planTester.buildPlan("a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);") ;
+ planTester.buildPlan("a = load '/user/pig/tests/data/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);") ;
LogicalPlan plan1 = planTester.buildPlan("b = foreach a generate (long)age as age:long, (int)gpa as gpa:int;") ;
LogicalPlan plan2 = planTester.buildPlan("c = foreach b generate SUM(age), SUM(gpa);") ;
planTester.typeCheckPlan(plan2);