You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by br...@apache.org on 2008/03/14 23:23:13 UTC
svn commit: r637293 - in /incubator/pig/trunk:
src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
test/org/apache/pig/test/TestPigScriptParser.java
Author: breed
Date: Fri Mar 14 15:23:05 2008
New Revision: 637293
URL: http://svn.apache.org/viewvc?rev=637293&view=rev
Log:
[#PIG-123] cannot escape single quotes in single quoted strings when using the eq or match operator
Added:
incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java
Modified:
incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
Modified: incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=637293&r1=637292&r2=637293&view=diff
==============================================================================
--- incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt (original)
+++ incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Fri Mar 14 15:23:05 2008
@@ -113,7 +113,7 @@
}
static String unquote(String s) {
- return s.substring(1, s.length()-1);
+ return StringUtils.unescapeInputString(s.substring(1, s.length()-1)) ;
}
static int undollar(String s) {
@@ -245,6 +245,91 @@
}
+
+class StringUtils {
+
+ public static String unescapeInputString(String input) {
+
+ if (input == null) {
+ return new String() ;
+ }
+
+ // Needed variables
+ // preset the size so our StringBuilders don't have to grow
+ int inputlength = input.length();
+ StringBuilder unicode = new StringBuilder(4);
+ StringBuilder output = new StringBuilder(inputlength) ;
+ boolean hadSlash = false;
+ boolean inUnicode = false;
+
+ // The main loop
+ for (int i = 0; i < inputlength; i++) {
+ char ch = input.charAt(i);
+ // currently doing unicode mode
+ if (inUnicode) {
+ unicode.append(ch);
+ if (unicode.length() == 4) {
+ // unicode now contains the four hex digits
+ try {
+ int value = Integer.parseInt(unicode.toString(), 0x10);
+ output.append((char) value) ;
+ // reuse the StringBuilder
+ unicode.setLength(0);
+ inUnicode = false;
+ hadSlash = false;
+ } catch (NumberFormatException nfe) {
+ throw new RuntimeException("Unable to parse unicode value: " + unicode, nfe);
+ }
+ }
+ continue;
+ }
+ if (hadSlash) {
+ // handle an escaped value
+ hadSlash = false;
+ switch (ch) {
+ case '\\':
+ output.append('\\');
+ break;
+ case '\'':
+ output.append('\'');
+ break;
+ case 'r':
+ output.append('\r');
+ break;
+ case 'f':
+ output.append('\f');
+ break;
+ case 't':
+ output.append('\t');
+ break;
+ case 'n':
+ output.append('\n');
+ break;
+ case 'b':
+ output.append('\b');
+ break;
+ case 'u':
+ {
+ // switch to unicode mode
+ inUnicode = true;
+ break;
+ }
+ default :
+ output.append(ch);
+ break;
+ }
+ continue;
+ } else if (ch == '\\') {
+ hadSlash = true;
+ continue;
+ }
+ output.append(ch);
+ }
+
+ return output.toString() ;
+ }
+}
+
PARSER_END(QueryParser)
@@ -318,7 +403,20 @@
| < INTEGER: ( <DIGIT> )+ >
}
-TOKEN : { <QUOTEDSTRING : "'" (~["'"])* "'"> }
+TOKEN : { <QUOTEDSTRING : "'"
+ ( (~["'","\\","\n","\r"])
+ | ("\\"
+ ( ["n","t","b","r","f","\\","'"] )
+ )
+ | ("\\u"
+ ["0"-"9","A"-"F","a"-"f"]
+ ["0"-"9","A"-"F","a"-"f"]
+ ["0"-"9","A"-"F","a"-"f"]
+ ["0"-"9","A"-"F","a"-"f"]
+ )
+ )*
+ "'"> }
+
TOKEN : { <EXECCOMMAND : "`" (~["`"])* "`"> }
// Pig has special variables starting with $
TOKEN : { <DOLLARVAR : "$" <INTEGER> > }
Modified: incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj?rev=637293&r1=637292&r2=637293&view=diff
==============================================================================
--- incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj (original)
+++ incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj Fri Mar 14 15:23:05 2008
@@ -188,7 +188,8 @@
<IN_STRING> MORE :
{
- <"'"> { SwitchTo(prevState);}
+ <"\\'">
+| <"'"> { SwitchTo(prevState);}
| <("\n" | "\r" | "\r\n")> {secondary_prompt();}
| <(~[])>
}
@@ -248,7 +249,19 @@
TOKEN: {<IDENTIFIER: (<LETTER>)+(<DIGIT> | <LETTER> | <SPECIALCHAR>)*>}
TOKEN: {<PATH: (~["(", ")", ";", "\r", " ", "\t", "\n"])+>}
-TOKEN : { <QUOTEDSTRING : "'" (~["'"])* "'"> }
+TOKEN : { <QUOTEDSTRING : "'"
+ ( (~["'","\\","\n","\r"])
+ | ("\\"
+ ( ["n","t","b","r","f","\\","'"] )
+ )
+ | ("\\u"
+ ["0"-"9","A"-"F","a"-"f"]
+ ["0"-"9","A"-"F","a"-"f"]
+ ["0"-"9","A"-"F","a"-"f"]
+ ["0"-"9","A"-"F","a"-"f"]
+ )
+ )*
+ "'"> }
void parse() throws IOException:
{
Token t1, t2;
Added: incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java?rev=637293&view=auto
==============================================================================
--- incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java (added)
+++ incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java Fri Mar 14 15:23:05 2008
@@ -0,0 +1,111 @@
+package org.apache.pig.test;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Map;
+import java.util.HashMap;
+
+import org.junit.Test;
+import junit.framework.TestCase;
+
+import org.apache.pig.PigServer;
+import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.logicalLayer.* ;
+import org.apache.pig.impl.logicalLayer.parser.* ;
+import org.apache.pig.impl.eval.* ;
+import org.apache.pig.impl.eval.cond.* ;
+
+public class TestPigScriptParser extends TestCase {
+
+ @Test
+ public void testParserWithEscapeCharacters() throws Exception {
+
+ // All the needed variables
+ Map<String, LogicalPlan> aliases = new HashMap<String, LogicalPlan>() ;
+ Map<OperatorKey, LogicalOperator> opTable = new HashMap<OperatorKey, LogicalOperator>() ;
+ PigContext pigContext = new PigContext(PigServer.ExecType.LOCAL) ;
+
+ String tempFile = this.prepareTempFile() ;
+
+ // Start the real parsing job
+ {
+ // Initial statement
+ String query = String.format("A = LOAD '%s' ;", tempFile) ;
+ ByteArrayInputStream in = new ByteArrayInputStream(query.getBytes());
+ QueryParser parser = new QueryParser(in, pigContext, "scope", aliases, opTable) ;
+ LogicalPlan lp = parser.Parse() ;
+ aliases.put(lp.getAlias(), lp) ;
+ }
+
+ {
+ // Normal condition
+ String query = "B1 = filter A by $0 eq 'This is a test string' ;" ;
+ checkParsedConstContent(aliases, opTable, pigContext,
+ query, "This is a test string") ;
+ }
+
+ {
+ // single-quote condition
+ String query = "B2 = filter A by $0 eq 'This is a test \\'string' ;" ;
+ checkParsedConstContent(aliases, opTable, pigContext,
+ query, "This is a test 'string") ;
+ }
+
+ {
+ // newline condition
+ String query = "B3 = filter A by $0 eq 'This is a test \\nstring' ;" ;
+ checkParsedConstContent(aliases, opTable, pigContext,
+ query, "This is a test \nstring") ;
+ }
+
+ {
+ // Unicode
+ String query = "B4 = filter A by $0 eq 'This is a test \\uD30C\\uC774string' ;" ;
+ checkParsedConstContent(aliases, opTable, pigContext,
+ query, "This is a test \uD30C\uC774string") ;
+ }
+ }
+
+ private void checkParsedConstContent(Map<String, LogicalPlan> aliases,
+ Map<OperatorKey, LogicalOperator> opTable,
+ PigContext pigContext,
+ String query,
+ String expectedContent)
+ throws Exception {
+ // Run the parser
+ ByteArrayInputStream in = new ByteArrayInputStream(query.getBytes());
+ QueryParser parser = new QueryParser(in, pigContext, "scope", aliases, opTable) ;
+ LogicalPlan lp = parser.Parse() ;
+ aliases.put(lp.getAlias(), lp) ;
+
+ // Digging down the tree
+ LOEval eval = (LOEval)opTable.get(lp.getRoot()) ;
+ CompCond compCond = ((CompCond)(((FilterSpec) eval.getSpec()).cond)) ;
+
+ // Here is the actual check logic
+ if (compCond.left instanceof ConstSpec) {
+ ConstSpec constSpec = (ConstSpec) compCond.left ;
+ assertTrue("Must be equal",
+ constSpec.constant.equals(expectedContent)) ;
+ }
+ // If not left, it must be right.
+ else {
+ ConstSpec constSpec = (ConstSpec) compCond.right ;
+ assertTrue("Must be equal",
+ constSpec.constant.equals(expectedContent)) ;
+ }
+ }
+
+ private String prepareTempFile() throws IOException {
+ File inputFile = File.createTempFile("test", "txt");
+ inputFile.deleteOnExit() ;
+ PrintStream ps = new PrintStream(new FileOutputStream(inputFile));
+ ps.println("hohoho") ;
+ ps.close();
+ return inputFile.getPath() ;
+ }
+
+}