You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2011/01/21 22:58:18 UTC
svn commit: r1062042 - in /pig/trunk: ./
src/org/apache/pig/impl/logicalLayer/parser/ src/org/apache/pig/parser/
test/org/apache/pig/parser/ test/org/apache/pig/test/
Author: daijy
Date: Fri Jan 21 21:58:17 2011
New Revision: 1062042
URL: http://svn.apache.org/viewvc?rev=1062042&view=rev
Log:
PIG-1749: Update Pig parser so that function arguments can contain newline characters
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
pig/trunk/src/org/apache/pig/parser/QueryLexer.g
pig/trunk/src/org/apache/pig/parser/QueryParser.g
pig/trunk/test/org/apache/pig/parser/TestQueryParser.java
pig/trunk/test/org/apache/pig/test/TestParamSubPreproc.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1062042&r1=1062041&r2=1062042&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Jan 21 21:58:17 2011
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-1749: Update Pig parser so that function arguments can contain newline characters (jghoman via daijy)
+
PIG-1806: Modify embedded Pig API for usability (rding)
PIG-1799: Provide deployable maven artifacts for pigunit and pig smoke tests
Modified: pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=1062042&r1=1062041&r2=1062042&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt (original)
+++ pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Fri Jan 21 21:58:17 2011
@@ -966,6 +966,23 @@ TOKEN : { <QUOTEDSTRING : "'"
)*
"'"> }
+
+TOKEN:{ < QUOTED_MULTI_STRING :
+"'" ( (~["'","\\","\n","\r"])
+ | ("\\" ["n","t","b","r","f","\\","'"] )
+ | ("\\u" ["0"-"9","A"-"F","a"-"f"]["0"-"9","A"-"F","a"-"f"]
+ ["0"-"9","A"-"F","a"-"f"]["0"-"9","A"-"F","a"-"f"])
+ )*
+ ["\n","\r"]
+ ( (~["'","\\"] )
+ | ("\\" ["n","t","b","r","f","\\","'"] )
+ | ("\\u" ["0"-"9","A"-"F","a"-"f"]["0"-"9","A"-"F","a"-"f"]
+ ["0"-"9","A"-"F","a"-"f"]["0"-"9","A"-"F","a"-"f"])
+ )*
+"'">
+}
+
+
TOKEN : { <EXECCOMMAND : "`" (~["`"])* "`"> }
// Pig has special variables starting with $
TOKEN : { <DOLLARVAR : "$" <INTEGER> > }
@@ -1397,6 +1414,33 @@ String StringList() :
{log.debug("StringList: " + sb.toString()); return sb.toString();}
}
+String FunctionArgs() :
+{
+ StringBuilder sb = new StringBuilder();
+ Token t;
+}
+{
+(
+ (
+ ( ( t = <QUOTED_MULTI_STRING> {sb.append(StringUtils.unescapeInputString(t.image));} )
+ |
+ ( t = <QUOTEDSTRING> {sb.append(StringUtils.unescapeInputString(t.image));})
+ )
+ ( ","
+ (
+ (t = <QUOTED_MULTI_STRING> {sb.append(",");sb.append(StringUtils.unescapeInputString(t.image));} )
+ |
+ (t = <QUOTEDSTRING> {sb.append(",");sb.append(StringUtils.unescapeInputString(t.image));} )
+ )
+ )*
+ )
+ | {}
+)
+ {log.debug("FuncArgs: " + sb.toString()); return sb.toString();}
+}
+
+
+
//B = native ('mymr.jar' [, 'other.jar' ...]) A store into 'storeLocation' using storeFunc load 'loadLocation' using loadFunc ['params'];
LogicalOperator MapReduceClause(LogicalPlan lp) :
{
@@ -3372,7 +3416,7 @@ FuncSpec NonEvalFuncSpec(byte funcType)
}
}
)
-| functionName = QualifiedFunction() ( "(" functionArgs = StringList() ")" )?
+| functionName = QualifiedFunction() ( "(" functionArgs = FunctionArgs() ")" )?
)
{
if(null != func) {
Modified: pig/trunk/src/org/apache/pig/parser/QueryLexer.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/QueryLexer.g?rev=1062042&r1=1062041&r2=1062042&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/QueryLexer.g (original)
+++ pig/trunk/src/org/apache/pig/parser/QueryLexer.g Fri Jan 21 21:58:17 2011
@@ -294,6 +294,16 @@ QUOTEDSTRING : '\'' ( ( ~ ( '\'' | '\
'\''
;
+MULTILINE_QUOTEDSTRING : '\'' ( ( ~ ( '\'' | '\\' ) )
+ | ( '\\' ( ( 'N' | 'T' | 'B' | 'R' | 'F' | '\\' | '\'' | 'n' | 'r' ) ) )
+ | ( '\\u' ( '0'..'9' | 'A'..'F' )
+ ( '0'..'9' | 'A'..'F' )
+ ( '0'..'9' | 'A'..'F' )
+ ( '0'..'9' | 'A'..'F' ) )
+ )*
+ '\''
+;
+
EXECCOMMAND : '`' ( ~( '`' ) )* '`'
;
Modified: pig/trunk/src/org/apache/pig/parser/QueryParser.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/QueryParser.g?rev=1062042&r1=1062041&r2=1062042&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/QueryParser.g (original)
+++ pig/trunk/src/org/apache/pig/parser/QueryParser.g Fri Jan 21 21:58:17 2011
@@ -233,8 +233,11 @@ func_name : eid ( ( PERIOD | DOLLAR ) ei
func_alias : IDENTIFIER
;
-func_args : QUOTEDSTRING ( COMMA QUOTEDSTRING )*
- -> QUOTEDSTRING+
+func_args_string : QUOTEDSTRING | MULTILINE_QUOTEDSTRING
+;
+
+func_args : func_args_string ( COMMA func_args_string )*
+ -> func_args_string+
;
group_clause : ( GROUP | COGROUP )^ group_item_list ( USING! group_type )?
Modified: pig/trunk/test/org/apache/pig/parser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/parser/TestQueryParser.java?rev=1062042&r1=1062041&r2=1062042&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/parser/TestQueryParser.java (original)
+++ pig/trunk/test/org/apache/pig/parser/TestQueryParser.java Fri Jan 21 21:58:17 2011
@@ -50,51 +50,42 @@ public class TestQueryParser {
@Test
public void testNegative1() throws IOException, RecognitionException {
- int errorCount = parse( "A = load 'x'; B=A;" );
- Assert.assertTrue( errorCount > 0 );
+ shouldFail("A = load 'x'; B=A;");
}
@Test
public void testNegative2() throws IOException, RecognitionException {
- int errorCount = parse( "A = load 'x'; B=(A);" );
- Assert.assertTrue( errorCount > 0 );
+ shouldFail("A = load 'x'; B=(A);");
}
@Test
public void testNegative3() throws IOException, RecognitionException {
- int errorCount = parse( "A = load 'x';B = (A) as (a:int, b:long);" );
- Assert.assertTrue( errorCount > 0 );
+ shouldFail("A = load 'x';B = (A) as (a:int, b:long);");
}
@Test
public void testNegative4() throws IOException, RecognitionException {
- int errorCount = parse( "A = load 'x'; B = ( filter A by $0 == 0 ) as (a:bytearray, b:long);" );
- Assert.assertTrue( errorCount > 0 );
+ shouldFail("A = load 'x'; B = ( filter A by $0 == 0 ) as (a:bytearray, b:long);");
}
@Test
public void testNegative5() throws IOException, RecognitionException {
- int errorCount = parse( "A = load 'x'; D = group A by $0:long;" );
- Assert.assertTrue( errorCount > 0 );
+ shouldFail("A = load 'x'; D = group A by $0:long;");
}
@Test
public void testNegative6() throws IOException, RecognitionException {
- int errorCount = parse( "A = load '/Users/gates/test/data/studenttab10'; B = foreach A generate $0, 3.0e10.1;" );
- Assert.assertTrue( errorCount > 0 );
+ shouldFail("A = load '/Users/gates/test/data/studenttab10'; B = foreach A generate $0, 3.0e10.1;");
}
@Test
public void test2() throws IOException, RecognitionException {
- int errorCount = parse( "A = load '/Users/gates/test/data/studenttab10'; B = foreach A generate ( $0 == 0 ? 1 : 0 );" );
- Assert.assertTrue( errorCount == 0 );
+ shouldPass("A = load '/Users/gates/test/data/studenttab10'; B = foreach A generate ( $0 == 0 ? 1 : 0 );");
}
@Test
public void test3() throws IOException, RecognitionException {
- int errorCount = parse(
- "a = load '1.txt' as (a0); b = foreach a generate flatten((bag{T:tuple(m:map[])})a0) as b0:map[];c = foreach b generate (long)b0#'key1';" );
- Assert.assertTrue( errorCount == 0 );
+ shouldPass("a = load '1.txt' as (a0); b = foreach a generate flatten((bag{T:tuple(m:map[])})a0) as b0:map[];c = foreach b generate (long)b0#'key1';");
}
@Test
@@ -157,6 +148,44 @@ public class TestQueryParser {
stmt = ast.getChild( 4 );
Assert.assertTrue( "STORE".equalsIgnoreCase( stmt.getChild( 0 ).getText() ) );
}
+
+ @Test
+ public void testMultilineFunctionArguments() throws RecognitionException, IOException {
+ final String pre = "STORE data INTO 'testOut' \n" +
+ "USING PigStorage (\n";
+
+ String lotsOfNewLines = "'{\"debug\": 5,\n" +
+ " \"data\": \"/user/lguo/testOut/ComponentActTracking4/part-m-00000.avro\",\n" +
+ " \"field0\": \"int\",\n" +
+ " \"field1\": \"def:browser_id\",\n" +
+ " \"field3\": \"def:act_content\" }\n '\n";
+
+ String [] queries = { lotsOfNewLines,
+ "'notsplitatall'",
+ "'see you\nnext line'",
+ "'surrounded \n by spaces'",
+ "'\nleading newline'",
+ "'trailing newline\n'",
+ "'\n'",
+ "'repeated\n\n\n\n\n\n\n\n\nnewlines'",
+ "'also\ris\rsupported\r'"};
+
+ final String post = ");";
+
+ for(String q : queries) {
+ shouldPass(pre + q + post);
+ }
+ }
+
+ private void shouldPass(String query) throws RecognitionException, IOException {
+ System.out.println("Testing: " + query);
+ Assert.assertEquals(query + " should have passed", 0, parse(query));
+ }
+
+ private void shouldFail(String query) throws RecognitionException, IOException {
+ System.out.println("Testing: " + query);
+ Assert.assertFalse(query + " should have failed", 0 == parse(query));
+ }
private int parse(String query) throws IOException, RecognitionException {
CharStream input = new QueryParserStringStream( query );
@@ -169,8 +198,8 @@ public class TestQueryParser {
Tree ast = (Tree)result.getTree();
System.out.println( ast.toStringTree() );
- TreePrinter.printTree( (CommonTree)ast, 0 );
- Assert.assertEquals( 0, lexer.getNumberOfSyntaxErrors() );
+ TreePrinter.printTree((CommonTree) ast, 0);
+ Assert.assertEquals(0, lexer.getNumberOfSyntaxErrors());
return parser.getNumberOfSyntaxErrors();
}
Modified: pig/trunk/test/org/apache/pig/test/TestParamSubPreproc.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestParamSubPreproc.java?rev=1062042&r1=1062041&r2=1062042&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestParamSubPreproc.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestParamSubPreproc.java Fri Jan 21 21:58:17 2011
@@ -24,6 +24,7 @@ import org.junit.Test;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
@@ -1383,4 +1384,102 @@ public class TestParamSubPreproc extends
}
}
+
+ /* Test case
+ * Use a parameter within a pig function argument (containing newline characters).
+ * provide value for it.
+ */
+ @Test
+ public void testSubstitutionInFuncArgs() throws Exception{
+ log.info("Starting test testSubstitutionInFuncArgs()");
+ final String queryString =
+ " avro = LOAD '/data/part-m-00000.avro' USING PigStorage ();\n" +
+ " avro2 = FOREACH avro GENERATE browser_id, component_version, " +
+ "member_id, page_key, session_id, tracking_time, type;\n" +
+
+ " fs -rmr testOut/out1;\n" +
+ " STORE avro2 INTO 'testOut/out2'\n" +
+ " USING PigStorage (\n" +
+ " ' {\n" +
+ " \"debug\": $debug,\n" +
+ " \"schema\":\n" +
+ " { \"type\":\"record\",\"name\":\"$name\", \n" +
+ " \"fields\": [ {\"name\":\"browser_id\", \"type\":[\"null\",\"string\"]}, \n" +
+ " {\"name\":\"component_version\",\"type\":\"int\"},\n" +
+ " {\"name\":\"member_id\",\"type\":\"int\"},\n" +
+ " {\"name\":\"page_key\",\"type\":[\"null\",\"string\"]},\n" +
+ " {\"name\":\"session_id\",\"type\":\"long\"},\n" +
+ " {\"name\":\"tracking_time\",\"type\":\"long\"},\n" +
+ " {\"name\":\"type\",\"type\":[\"null\",\"string\"]}\n" +
+ " ]\n" +
+ " }\n" +
+ " }\n"+
+ " ');";
+
+ final String expectedString =
+ " avro = LOAD '/data/part-m-00000.avro' USING PigStorage ();\n" +
+ " avro2 = FOREACH avro GENERATE browser_id, component_version, " +
+ "member_id, page_key, session_id, tracking_time, type;\n" +
+
+ " fs -rmr testOut/out1;\n" +
+ " STORE avro2 INTO 'testOut/out2'\n" +
+ " USING PigStorage (\n" +
+ " ' {\n" +
+ " \"debug\": 5,\n" +
+ " \"schema\":\n" +
+ " { \"type\":\"record\",\"name\":\"TestRecord\", \n" +
+ " \"fields\": [ {\"name\":\"browser_id\", \"type\":[\"null\",\"string\"]}, \n" +
+ " {\"name\":\"component_version\",\"type\":\"int\"},\n" +
+ " {\"name\":\"member_id\",\"type\":\"int\"},\n" +
+ " {\"name\":\"page_key\",\"type\":[\"null\",\"string\"]},\n" +
+ " {\"name\":\"session_id\",\"type\":\"long\"},\n" +
+ " {\"name\":\"tracking_time\",\"type\":\"long\"},\n" +
+ " {\"name\":\"type\",\"type\":[\"null\",\"string\"]}\n" +
+ " ]\n" +
+ " }\n" +
+ " }\n"+
+ " ');";
+ try {
+ ParameterSubstitutionPreprocessor ps = new ParameterSubstitutionPreprocessor(50);
+ pigIStream = new BufferedReader(
+ new InputStreamReader(new ByteArrayInputStream(queryString.getBytes("UTF-8"))));
+ pigOStream = new FileWriter(basedir + "/output26.pig");
+
+ String[] arg = {"debug = '5'", "name = 'TestRecord'"};
+ String[] argFiles = null;
+ ps.genSubstitutedFile(pigIStream , pigOStream , arg , argFiles);
+
+ FileInputStream pigResultStream = new FileInputStream(basedir + "/output26.pig");
+ InputStream expected = new ByteArrayInputStream(expectedString.getBytes("UTF-8"));
+ BufferedReader inExpected = new BufferedReader(new InputStreamReader(expected));
+ BufferedReader inResult = new BufferedReader(new InputStreamReader(pigResultStream));
+
+ String exLine;
+ String resLine;
+ int lineNum=0;
+
+ while (true) {
+ lineNum++;
+ exLine = inExpected.readLine();
+ resLine = inResult.readLine();
+ if (exLine==null || resLine==null)
+ break;
+ assertEquals("Parameter substitution with shell command failed. " + "Expected : "+exLine+" , but got : "+resLine+" in line num : "+lineNum ,exLine.trim(), resLine.trim());
+ }
+ if (!(exLine==null && resLine==null)) {
+ fail ("Parameter substitution with shell command failed. " + "Expected : "+exLine+" , but got : "+resLine+" in line num : "+lineNum);
+ }
+
+ inExpected.close();
+ inResult.close();
+ } catch (ParseException e) {
+ fail ("Got ParseException : " + e.getMessage());
+ } catch (RuntimeException e) {
+ fail ("Got RuntimeException : " + e.getMessage());
+ } catch (Error e) {
+ fail ("Got error : " + e.getMessage());
+ }
+ log.info("Done");
+ }
+
}