You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by br...@apache.org on 2008/03/14 23:23:13 UTC

svn commit: r637293 - in /incubator/pig/trunk: src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj test/org/apache/pig/test/TestPigScriptParser.java

Author: breed
Date: Fri Mar 14 15:23:05 2008
New Revision: 637293

URL: http://svn.apache.org/viewvc?rev=637293&view=rev
Log:
[#PIG-123] cannot escape single quotes in single quoted strings when using the eq or match operator


Added:
    incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java
Modified:
    incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
    incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj

Modified: incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=637293&r1=637292&r2=637293&view=diff
==============================================================================
--- incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt (original)
+++ incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Fri Mar 14 15:23:05 2008
@@ -113,7 +113,7 @@
     }
 
     static String unquote(String s) {
-		return s.substring(1, s.length()-1);
+		return StringUtils.unescapeInputString(s.substring(1, s.length()-1)) ;
 	}
 	
 	static int undollar(String s) {
@@ -245,6 +245,91 @@
 
 }
 
+
+class StringUtils {
+    
+       public static String unescapeInputString(String input)  {
+
+            if (input == null) {
+                return new String() ;
+            }
+            
+            // Needed variables
+            // preset the size so our StringBuilders don't have to grow
+            int inputlength = input.length();       
+            StringBuilder unicode = new StringBuilder(4);
+            StringBuilder output = new StringBuilder(inputlength) ;
+            boolean hadSlash = false;
+            boolean inUnicode = false;
+            
+            // The main loop
+            for (int i = 0; i < inputlength; i++) {
+                char ch = input.charAt(i);
+                // currently doing unicode mode
+                if (inUnicode) {
+                    unicode.append(ch);
+                    if (unicode.length() == 4) {
+                        // unicode now contains the four hex digits
+                        try {
+                            int value = Integer.parseInt(unicode.toString(), 0x10);
+                            output.append((char) value) ;
+                            // reuse the StringBuilder
+                            unicode.setLength(0);
+                            inUnicode = false;
+                            hadSlash = false;
+                        } catch (NumberFormatException nfe) {
+                            throw new RuntimeException("Unable to parse unicode value: " + unicode, nfe);
+                        }
+                    }
+                    continue;
+                }
+                if (hadSlash) {
+                    // handle an escaped value
+                    hadSlash = false;
+                    switch (ch) {
+                        case '\\':
+                            output.append('\\');
+                            break;
+                        case '\'':
+                            output.append('\'');
+                            break;
+                        case 'r':
+                            output.append('\r');
+                            break;
+                        case 'f':
+                            output.append('\f');
+                            break;
+                        case 't':
+                            output.append('\t');
+                            break;
+                        case 'n':
+                            output.append('\n');
+                            break;
+                        case 'b':
+                            output.append('\b');
+                            break;
+                        case 'u':
+                            {
+                                // switch to unicode mode
+                                inUnicode = true;
+                                break;
+                            }
+                        default :
+                            output.append(ch);
+                            break;
+                    }
+                    continue;
+                } else if (ch == '\\') {
+                    hadSlash = true;
+                    continue;
+                }
+                output.append(ch);
+            }
+            
+            return output.toString() ;
+        }
+}
+
 	
 PARSER_END(QueryParser)
 
@@ -318,7 +403,20 @@
 | 	< INTEGER: ( <DIGIT> )+ >
 }
 
-TOKEN : { <QUOTEDSTRING : "'" (~["'"])* "'"> }
+TOKEN : { <QUOTEDSTRING :  "'"
+      (   (~["'","\\","\n","\r"])
+        | ("\\"
+            ( ["n","t","b","r","f","\\","'"] )
+          )
+        | ("\\u"
+            ["0"-"9","A"-"F","a"-"f"]
+            ["0"-"9","A"-"F","a"-"f"]
+            ["0"-"9","A"-"F","a"-"f"]
+	         ["0"-"9","A"-"F","a"-"f"]
+          )
+      )*
+      "'"> }
+     
 TOKEN : { <EXECCOMMAND : "`" (~["`"])* "`"> }
 // Pig has special variables starting with $
 TOKEN : { <DOLLARVAR : "$" <INTEGER> > }

Modified: incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj?rev=637293&r1=637292&r2=637293&view=diff
==============================================================================
--- incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj (original)
+++ incubator/pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj Fri Mar 14 15:23:05 2008
@@ -188,7 +188,8 @@
 
 <IN_STRING> MORE :
 {
-	<"'"> { SwitchTo(prevState);}
+	<"\\'">
+|	<"'"> { SwitchTo(prevState);}
 |	<("\n" | "\r" | "\r\n")> {secondary_prompt();}
 |	<(~[])>
 }
@@ -248,7 +249,19 @@
 
 TOKEN: {<IDENTIFIER: (<LETTER>)+(<DIGIT> | <LETTER> | <SPECIALCHAR>)*>}
 TOKEN: {<PATH: (~["(", ")", ";", "\r", " ", "\t", "\n"])+>}
-TOKEN : { <QUOTEDSTRING : "'" (~["'"])* "'"> }
+TOKEN : { <QUOTEDSTRING :  "'"
+      (   (~["'","\\","\n","\r"])
+        | ("\\"
+            ( ["n","t","b","r","f","\\","'"] )
+          )
+        | ("\\u"
+            ["0"-"9","A"-"F","a"-"f"]
+            ["0"-"9","A"-"F","a"-"f"]
+            ["0"-"9","A"-"F","a"-"f"]
+            ["0"-"9","A"-"F","a"-"f"]
+          )
+      )*
+      "'"> }
 void parse() throws IOException:
 {
 	Token t1, t2;

Added: incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java?rev=637293&view=auto
==============================================================================
--- incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java (added)
+++ incubator/pig/trunk/test/org/apache/pig/test/TestPigScriptParser.java Fri Mar 14 15:23:05 2008
@@ -0,0 +1,111 @@
+package org.apache.pig.test;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Map;
+import java.util.HashMap;
+
+import org.junit.Test;
+import junit.framework.TestCase;
+
+import org.apache.pig.PigServer;
+import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.logicalLayer.* ;
+import org.apache.pig.impl.logicalLayer.parser.* ;
+import org.apache.pig.impl.eval.* ;
+import org.apache.pig.impl.eval.cond.* ;
+
+public class TestPigScriptParser extends TestCase {
+
+    @Test
+    public void testParserWithEscapeCharacters() throws Exception {
+
+        // All the needed variables
+        Map<String, LogicalPlan> aliases = new HashMap<String, LogicalPlan>() ;
+        Map<OperatorKey, LogicalOperator> opTable = new HashMap<OperatorKey, LogicalOperator>() ;
+        PigContext pigContext = new PigContext(PigServer.ExecType.LOCAL) ;
+        
+        String tempFile = this.prepareTempFile() ;
+        
+        // Start the real parsing job
+        {
+        	// Initial statement
+        	String query = String.format("A = LOAD '%s' ;", tempFile) ;
+        	ByteArrayInputStream in = new ByteArrayInputStream(query.getBytes()); 
+        	QueryParser parser = new QueryParser(in, pigContext, "scope", aliases, opTable) ;
+        	LogicalPlan lp = parser.Parse() ; 
+        	aliases.put(lp.getAlias(), lp) ;
+        }
+        
+        {
+        	// Normal condition
+        	String query = "B1 = filter A by $0 eq 'This is a test string' ;" ;
+        	checkParsedConstContent(aliases, opTable, pigContext,
+        	                        query, "This is a test string") ;	
+        }
+        
+        {
+        	// single-quote condition
+        	String query = "B2 = filter A by $0 eq 'This is a test \\'string' ;" ;
+        	checkParsedConstContent(aliases, opTable, pigContext,
+        	                        query, "This is a test 'string") ;	
+        }
+        
+        {
+        	// newline condition
+        	String query = "B3 = filter A by $0 eq 'This is a test \\nstring' ;" ;
+        	checkParsedConstContent(aliases, opTable, pigContext,
+        	                        query, "This is a test \nstring") ;	
+        }
+        
+        {
+        	// Unicode
+        	String query = "B4 = filter A by $0 eq 'This is a test \\uD30C\\uC774string' ;" ;
+        	checkParsedConstContent(aliases, opTable, pigContext,
+        	                        query, "This is a test \uD30C\uC774string") ;	
+        }
+    }
+
+	private void checkParsedConstContent(Map<String, LogicalPlan> aliases,
+                                         Map<OperatorKey, LogicalOperator> opTable,
+                                         PigContext pigContext,
+                                         String query,
+                                         String expectedContent) 
+                                        throws Exception {
+        // Run the parser
+        ByteArrayInputStream in = new ByteArrayInputStream(query.getBytes()); 
+        QueryParser parser = new QueryParser(in, pigContext, "scope", aliases, opTable) ;
+        LogicalPlan lp = parser.Parse() ; 
+        aliases.put(lp.getAlias(), lp) ;
+        
+        // Digging down the tree
+        LOEval eval = (LOEval)opTable.get(lp.getRoot()) ;
+        CompCond compCond = ((CompCond)(((FilterSpec) eval.getSpec()).cond)) ;
+        
+        // Here is the actual check logic
+        if (compCond.left instanceof ConstSpec) {
+            ConstSpec constSpec = (ConstSpec) compCond.left ;
+            assertTrue("Must be equal", 
+                        constSpec.constant.equals(expectedContent)) ;
+        } 
+        // If not left, it must be right.
+        else {
+            ConstSpec constSpec = (ConstSpec) compCond.right ;
+            assertTrue("Must be equal", 
+                        constSpec.constant.equals(expectedContent)) ;
+        }
+    }
+
+    private String prepareTempFile() throws IOException {
+        File inputFile = File.createTempFile("test", "txt");
+        inputFile.deleteOnExit() ;
+        PrintStream ps = new PrintStream(new FileOutputStream(inputFile));
+        ps.println("hohoho") ;
+        ps.close();
+        return inputFile.getPath() ;
+    }
+
+}