You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2011/01/21 22:58:18 UTC

svn commit: r1062042 - in /pig/trunk: ./ src/org/apache/pig/impl/logicalLayer/parser/ src/org/apache/pig/parser/ test/org/apache/pig/parser/ test/org/apache/pig/test/

Author: daijy
Date: Fri Jan 21 21:58:17 2011
New Revision: 1062042

URL: http://svn.apache.org/viewvc?rev=1062042&view=rev
Log:
PIG-1749: Update Pig parser so that function arguments can contain newline characters

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
    pig/trunk/src/org/apache/pig/parser/QueryLexer.g
    pig/trunk/src/org/apache/pig/parser/QueryParser.g
    pig/trunk/test/org/apache/pig/parser/TestQueryParser.java
    pig/trunk/test/org/apache/pig/test/TestParamSubPreproc.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1062042&r1=1062041&r2=1062042&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Jan 21 21:58:17 2011
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
 
 IMPROVEMENTS
 
+PIG-1749: Update Pig parser so that function arguments can contain newline characters (jghoman via daijy)
+
 PIG-1806: Modify embedded Pig API for usability (rding)
 
 PIG-1799: Provide deployable maven artifacts for pigunit and pig smoke tests

Modified: pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=1062042&r1=1062041&r2=1062042&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt (original)
+++ pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Fri Jan 21 21:58:17 2011
@@ -966,6 +966,23 @@ TOKEN : { <QUOTEDSTRING :  "'"
 )*
 "'"> }
 
+
+TOKEN:{ < QUOTED_MULTI_STRING :  
+"'" (  (~["'","\\","\n","\r"])
+       | ("\\"  ["n","t","b","r","f","\\","'"] )
+       | ("\\u" ["0"-"9","A"-"F","a"-"f"]["0"-"9","A"-"F","a"-"f"]
+                ["0"-"9","A"-"F","a"-"f"]["0"-"9","A"-"F","a"-"f"])
+    )* 
+    ["\n","\r"] 
+    (  (~["'","\\"] )
+       | ("\\"  ["n","t","b","r","f","\\","'"] )
+       | ("\\u" ["0"-"9","A"-"F","a"-"f"]["0"-"9","A"-"F","a"-"f"]
+                ["0"-"9","A"-"F","a"-"f"]["0"-"9","A"-"F","a"-"f"])
+    )*
+"'"> 
+}
+
+
 TOKEN : { <EXECCOMMAND : "`" (~["`"])* "`"> }
 // Pig has special variables starting with $
 TOKEN : { <DOLLARVAR : "$" <INTEGER> > }
@@ -1397,6 +1414,33 @@ String StringList() : 
 	{log.debug("StringList: " + sb.toString()); return sb.toString();}
 }
 
+String FunctionArgs() : 
+{
+       StringBuilder sb = new StringBuilder(); 
+       Token t;
+}
+{
+(
+       (
+         ( ( t = <QUOTED_MULTI_STRING> {sb.append(StringUtils.unescapeInputString(t.image));} )
+           | 
+           ( t = <QUOTEDSTRING> {sb.append(StringUtils.unescapeInputString(t.image));})
+         )
+         ( "," 
+           (
+            (t = <QUOTED_MULTI_STRING> {sb.append(",");sb.append(StringUtils.unescapeInputString(t.image));} ) 
+            | 
+            (t = <QUOTEDSTRING> {sb.append(",");sb.append(StringUtils.unescapeInputString(t.image));} ) 
+           ) 
+         )*
+       )
+       | {}
+)
+       {log.debug("FuncArgs: " + sb.toString()); return sb.toString();}
+}
+
+
+
 //B = native ('mymr.jar' [, 'other.jar' ...]) A store into 'storeLocation' using storeFunc load 'loadLocation' using loadFunc ['params'];
 LogicalOperator MapReduceClause(LogicalPlan lp) : 
 {
@@ -3372,7 +3416,7 @@ FuncSpec NonEvalFuncSpec(byte funcType) 
 		}
     }
     )
-|   functionName = QualifiedFunction() ( "(" functionArgs = StringList() ")" )?
+|   functionName = QualifiedFunction() ( "(" functionArgs = FunctionArgs() ")" )?
     )
 	{
 		if(null != func) {

Modified: pig/trunk/src/org/apache/pig/parser/QueryLexer.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/QueryLexer.g?rev=1062042&r1=1062041&r2=1062042&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/QueryLexer.g (original)
+++ pig/trunk/src/org/apache/pig/parser/QueryLexer.g Fri Jan 21 21:58:17 2011
@@ -294,6 +294,16 @@ QUOTEDSTRING :  '\'' (   ( ~ ( '\'' | '\
                 '\''
 ;
 
+MULTILINE_QUOTEDSTRING :  '\'' (   ( ~ ( '\'' | '\\' ) )
+                                 | ( '\\' ( ( 'N' | 'T' | 'B' | 'R' | 'F' | '\\' | '\'' | 'n' | 'r' ) ) )
+                                 | ( '\\u' ( '0'..'9' | 'A'..'F' )
+                                           ( '0'..'9' | 'A'..'F' )
+                                           ( '0'..'9' | 'A'..'F' )
+                                           ( '0'..'9' | 'A'..'F' )  )
+                               )*
+                '\''
+;
+
 EXECCOMMAND : '`' ( ~( '`' ) )* '`'
 ;
     

Modified: pig/trunk/src/org/apache/pig/parser/QueryParser.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/QueryParser.g?rev=1062042&r1=1062041&r2=1062042&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/QueryParser.g (original)
+++ pig/trunk/src/org/apache/pig/parser/QueryParser.g Fri Jan 21 21:58:17 2011
@@ -233,8 +233,11 @@ func_name : eid ( ( PERIOD | DOLLAR ) ei
 func_alias : IDENTIFIER
 ;
 
-func_args : QUOTEDSTRING ( COMMA QUOTEDSTRING )*
-         -> QUOTEDSTRING+
+func_args_string : QUOTEDSTRING | MULTILINE_QUOTEDSTRING
+;
+
+func_args : func_args_string ( COMMA func_args_string )*
+         -> func_args_string+
 ;
 
 group_clause : ( GROUP | COGROUP )^ group_item_list ( USING! group_type )?

Modified: pig/trunk/test/org/apache/pig/parser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/parser/TestQueryParser.java?rev=1062042&r1=1062041&r2=1062042&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/parser/TestQueryParser.java (original)
+++ pig/trunk/test/org/apache/pig/parser/TestQueryParser.java Fri Jan 21 21:58:17 2011
@@ -50,51 +50,42 @@ public class TestQueryParser {
 
     @Test
     public void testNegative1() throws IOException, RecognitionException {
-        int errorCount = parse( "A = load 'x'; B=A;" );
-        Assert.assertTrue( errorCount > 0 );
+        shouldFail("A = load 'x'; B=A;");
     }
     
     @Test
     public void testNegative2() throws IOException, RecognitionException {
-        int errorCount = parse( "A = load 'x'; B=(A);" );
-        Assert.assertTrue( errorCount > 0 );
+        shouldFail("A = load 'x'; B=(A);");
     }
 
     @Test
     public void testNegative3() throws IOException, RecognitionException {
-        int errorCount = parse( "A = load 'x';B = (A) as (a:int, b:long);" );
-        Assert.assertTrue( errorCount > 0 );
+        shouldFail("A = load 'x';B = (A) as (a:int, b:long);");
     }
 
     @Test
     public void testNegative4() throws IOException, RecognitionException {
-        int errorCount = parse( "A = load 'x'; B = ( filter A by $0 == 0 ) as (a:bytearray, b:long);" );
-        Assert.assertTrue( errorCount > 0 );
+        shouldFail("A = load 'x'; B = ( filter A by $0 == 0 ) as (a:bytearray, b:long);");
     }
     
     @Test
     public void testNegative5() throws IOException, RecognitionException {
-        int errorCount = parse( "A = load 'x'; D = group A by $0:long;" );
-        Assert.assertTrue( errorCount > 0 );
+        shouldFail("A = load 'x'; D = group A by $0:long;");
     }
     
     @Test
     public void testNegative6() throws IOException, RecognitionException {
-        int errorCount = parse( "A = load '/Users/gates/test/data/studenttab10'; B = foreach A generate $0, 3.0e10.1;" );
-        Assert.assertTrue( errorCount > 0 );
+        shouldFail("A = load '/Users/gates/test/data/studenttab10'; B = foreach A generate $0, 3.0e10.1;");
     }
     
     @Test
     public void test2() throws IOException, RecognitionException {
-        int errorCount = parse( "A = load '/Users/gates/test/data/studenttab10'; B = foreach A generate ( $0 == 0 ? 1 : 0 );" );
-        Assert.assertTrue( errorCount == 0 );
+        shouldPass("A = load '/Users/gates/test/data/studenttab10'; B = foreach A generate ( $0 == 0 ? 1 : 0 );");
     }
 
     @Test
     public void test3() throws IOException, RecognitionException {
-        int errorCount = parse( 
- "a = load '1.txt' as (a0); b = foreach a generate flatten((bag{T:tuple(m:map[])})a0) as b0:map[];c = foreach b generate (long)b0#'key1';" );
-        Assert.assertTrue( errorCount == 0 );
+        shouldPass("a = load '1.txt' as (a0); b = foreach a generate flatten((bag{T:tuple(m:map[])})a0) as b0:map[];c = foreach b generate (long)b0#'key1';");
     }
 
     @Test
@@ -157,6 +148,44 @@ public class TestQueryParser {
         stmt = ast.getChild( 4 );
         Assert.assertTrue( "STORE".equalsIgnoreCase( stmt.getChild( 0 ).getText() ) );
     }
+
+    @Test
+    public void testMultilineFunctionArguments() throws RecognitionException, IOException {
+        final String pre = "STORE data INTO 'testOut' \n" +
+                           "USING PigStorage (\n";
+
+        String lotsOfNewLines = "'{\"debug\": 5,\n" +
+                                "  \"data\": \"/user/lguo/testOut/ComponentActTracking4/part-m-00000.avro\",\n" +
+                                "  \"field0\": \"int\",\n" +
+                                "  \"field1\": \"def:browser_id\",\n" +
+                                "  \"field3\": \"def:act_content\" }\n '\n";
+
+        String [] queries = { lotsOfNewLines,
+                            "'notsplitatall'",
+                            "'see you\nnext line'",
+                            "'surrounded \n by spaces'",
+                            "'\nleading newline'",
+                            "'trailing newline\n'",
+                            "'\n'",
+                            "'repeated\n\n\n\n\n\n\n\n\nnewlines'",
+                            "'also\ris\rsupported\r'"};
+
+        final String post = ");";
+
+        for(String q : queries) {
+            shouldPass(pre + q + post);
+        }
+    }
+
+    private void shouldPass(String query) throws RecognitionException, IOException {
+        System.out.println("Testing: " + query);
+        Assert.assertEquals(query + " should have passed", 0, parse(query));
+    }
+
+    private void shouldFail(String query) throws RecognitionException, IOException {
+        System.out.println("Testing: " + query);
+        Assert.assertFalse(query + " should have failed", 0 == parse(query));
+    }
     
     private int parse(String query) throws IOException, RecognitionException  {
         CharStream input = new QueryParserStringStream( query );
@@ -169,8 +198,8 @@ public class TestQueryParser {
         Tree ast = (Tree)result.getTree();
 
         System.out.println( ast.toStringTree() );
-        TreePrinter.printTree( (CommonTree)ast, 0 );
-        Assert.assertEquals( 0, lexer.getNumberOfSyntaxErrors() );
+        TreePrinter.printTree((CommonTree) ast, 0);
+        Assert.assertEquals(0, lexer.getNumberOfSyntaxErrors());
         return parser.getNumberOfSyntaxErrors();
     }
 

Modified: pig/trunk/test/org/apache/pig/test/TestParamSubPreproc.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestParamSubPreproc.java?rev=1062042&r1=1062041&r2=1062042&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestParamSubPreproc.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestParamSubPreproc.java Fri Jan 21 21:58:17 2011
@@ -24,6 +24,7 @@ import org.junit.Test;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
+import java.io.ByteArrayInputStream;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.InputStream;
@@ -1383,4 +1384,102 @@ public class TestParamSubPreproc extends
         }
 
     }
+    
+    /* Test case 
+     *  Use a parameter within a pig function argument (containing newline characters).
+     *  provide value for it. 
+     */
+    @Test 
+    public void testSubstitutionInFuncArgs() throws Exception{
+        log.info("Starting test testSubstitutionInFuncArgs()");
+        final String queryString = 
+    "  avro = LOAD '/data/part-m-00000.avro' USING PigStorage ();\n" +
+    "   avro2 = FOREACH avro GENERATE  browser_id, component_version, " +
+                                   "member_id, page_key, session_id, tracking_time, type;\n" +
+
+    "    fs -rmr testOut/out1;\n" +
+    "    STORE avro2 INTO 'testOut/out2'\n" +
+    "    USING PigStorage (\n" +
+    "    ' {\n" +
+    "   \"debug\": $debug,\n" +
+    "    \"schema\":\n" +
+    "        { \"type\":\"record\",\"name\":\"$name\",   \n" +
+    "          \"fields\": [ {\"name\":\"browser_id\", \"type\":[\"null\",\"string\"]},  \n" +
+    "                      {\"name\":\"component_version\",\"type\":\"int\"},\n" +
+    "                      {\"name\":\"member_id\",\"type\":\"int\"},\n" + 
+    "                      {\"name\":\"page_key\",\"type\":[\"null\",\"string\"]},\n" + 
+    "                      {\"name\":\"session_id\",\"type\":\"long\"},\n" + 
+    "                      {\"name\":\"tracking_time\",\"type\":\"long\"},\n" + 
+    "                      {\"name\":\"type\",\"type\":[\"null\",\"string\"]}\n" + 
+    "                   ]\n" +
+    "        }\n" +
+    "    }\n"+
+    "    ');";
+
+        final String expectedString = 
+            "  avro = LOAD '/data/part-m-00000.avro' USING PigStorage ();\n" +
+            "   avro2 = FOREACH avro GENERATE  browser_id, component_version, " +
+                                           "member_id, page_key, session_id, tracking_time, type;\n" +
+
+            "    fs -rmr testOut/out1;\n" +
+            "    STORE avro2 INTO 'testOut/out2'\n" +
+            "    USING PigStorage (\n" +
+            "    ' {\n" +
+            "   \"debug\": 5,\n" +
+            "    \"schema\":\n" +
+            "        { \"type\":\"record\",\"name\":\"TestRecord\",   \n" +
+            "          \"fields\": [ {\"name\":\"browser_id\", \"type\":[\"null\",\"string\"]},  \n" +
+            "                      {\"name\":\"component_version\",\"type\":\"int\"},\n" +
+            "                      {\"name\":\"member_id\",\"type\":\"int\"},\n" + 
+            "                      {\"name\":\"page_key\",\"type\":[\"null\",\"string\"]},\n" + 
+            "                      {\"name\":\"session_id\",\"type\":\"long\"},\n" + 
+            "                      {\"name\":\"tracking_time\",\"type\":\"long\"},\n" + 
+            "                      {\"name\":\"type\",\"type\":[\"null\",\"string\"]}\n" + 
+            "                   ]\n" +
+            "        }\n" +
+            "    }\n"+
+            "    ');";
+        try {
+            ParameterSubstitutionPreprocessor ps = new ParameterSubstitutionPreprocessor(50);
+            pigIStream = new BufferedReader(
+                                            new InputStreamReader(new ByteArrayInputStream(queryString.getBytes("UTF-8"))));
+            pigOStream = new FileWriter(basedir + "/output26.pig");
+
+            String[] arg = {"debug = '5'", "name = 'TestRecord'"}; 
+            String[] argFiles = null;
+            ps.genSubstitutedFile(pigIStream , pigOStream , arg , argFiles);
+
+            FileInputStream pigResultStream = new FileInputStream(basedir + "/output26.pig");
+            InputStream expected = new ByteArrayInputStream(expectedString.getBytes("UTF-8"));
+            BufferedReader inExpected = new BufferedReader(new InputStreamReader(expected));
+            BufferedReader inResult = new BufferedReader(new InputStreamReader(pigResultStream));
+
+            String exLine;
+            String resLine;
+            int lineNum=0;
+
+            while (true) {
+                lineNum++;
+                exLine = inExpected.readLine();
+                resLine = inResult.readLine();
+                if (exLine==null || resLine==null)
+                    break;
+                assertEquals("Parameter substitution with shell command failed. " + "Expected : "+exLine+" , but got : "+resLine+" in line num : "+lineNum ,exLine.trim(), resLine.trim());
+            }
+            if (!(exLine==null && resLine==null)) {
+                fail ("Parameter substitution with shell command failed. " + "Expected : "+exLine+" , but got : "+resLine+" in line num : "+lineNum);
+            }
+
+            inExpected.close();
+            inResult.close();
+        } catch (ParseException e) {
+            fail ("Got ParseException : " + e.getMessage());
+        } catch (RuntimeException e) {
+            fail ("Got RuntimeException : " + e.getMessage());
+        } catch (Error e) {
+            fail ("Got error : " + e.getMessage());
+        }
+        log.info("Done");
+    }
+
 }