You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ha...@apache.org on 2011/08/26 22:27:19 UTC

svn commit: r1162237 - in /pig/trunk: CHANGES.txt src/org/apache/pig/builtin/TOKENIZE.java test/org/apache/pig/test/TestBuiltin.java

Author: hashutosh
Date: Fri Aug 26 20:27:19 2011
New Revision: 1162237

URL: http://svn.apache.org/viewvc?rev=1162237&view=rev
Log:
PIG-2242: Allow the delimiter to be specified when calling TOKENIZE

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java
    pig/trunk/test/org/apache/pig/test/TestBuiltin.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1162237&r1=1162236&r2=1162237&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Aug 26 20:27:19 2011
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
 
 IMPROVEMENTS
 
+PIG-2242: Allow the delimiter to be specified when calling TOKENIZE (markroddy via hashutosh)
+
 PIG-2240: Allow any compression codec to be specified in AvroStorage (tomwhite via dvryaboy)
 
 PIG-2229: Pig end-to-end tests should test local mode as well as mr mode (gates)

Modified: pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java?rev=1162237&r1=1162236&r2=1162237&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java Fri Aug 26 20:27:19 2011
@@ -61,7 +61,20 @@ public class TOKENIZE extends EvalFunc<D
                 " got " + o.getClass().getName();
                 throw new ExecException(msg, errCode, PigException.BUG);
             }
-            StringTokenizer tok = new StringTokenizer((String)o, " \",()*", false);
+            
+            String delim = " \",()*";
+            if (input.size()==2) {
+                Object d = input.get(1);
+                if (!(d instanceof String)) {
+                    int errCode = 2114;
+                    String msg = "Expected delim to be chararray, but" +
+                        " got " + d.getClass().getName();
+                    throw new ExecException(msg, errCode, PigException.BUG);
+                }
+                delim = (String)d;
+            }
+
+            StringTokenizer tok = new StringTokenizer((String)o, delim, false);
             while (tok.hasMoreTokens()) {
                 output.add(mTupleFactory.newTuple(tok.nextToken()));
             }
@@ -106,6 +119,10 @@ public class TOKENIZE extends EvalFunc<D
         Schema s = new Schema();
         s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
         funcList.add(new FuncSpec(this.getClass().getName(), s));
+        s = new Schema();
+        s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+        s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+        funcList.add(new FuncSpec(this.getClass().getName(), s));
         return funcList;
     }
 };

Modified: pig/trunk/test/org/apache/pig/test/TestBuiltin.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestBuiltin.java?rev=1162237&r1=1162236&r2=1162237&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestBuiltin.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestBuiltin.java Fri Aug 26 20:27:19 2011
@@ -2309,6 +2309,20 @@ public class TestBuiltin {
         rt = i.next();
         assertTrue(rt.get(0).equals("789"));
         
+        // Check when delim specified
+        Tuple t4 = tf.newTuple(2);
+        t4.set(0, "123|456|78\"9");
+        t4.set(1, "|");        
+        b = f.exec(t4);
+        assertTrue(b.size()==3);
+        i = b.iterator();
+        rt = i.next();
+        assertTrue(rt.get(0).equals("123"));
+        rt = i.next();
+        assertTrue(rt.get(0).equals("456"));
+        rt = i.next();
+        assertTrue(rt.get(0).equals("78\"9"));
+
         b = f.exec(t2);
         assertTrue(b==null);