You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ha...@apache.org on 2011/08/26 22:27:19 UTC
svn commit: r1162237 - in /pig/trunk: CHANGES.txt
src/org/apache/pig/builtin/TOKENIZE.java
test/org/apache/pig/test/TestBuiltin.java
Author: hashutosh
Date: Fri Aug 26 20:27:19 2011
New Revision: 1162237
URL: http://svn.apache.org/viewvc?rev=1162237&view=rev
Log:
PIG-2242: Allow the delimiter to be specified when calling TOKENIZE
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java
pig/trunk/test/org/apache/pig/test/TestBuiltin.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1162237&r1=1162236&r2=1162237&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Aug 26 20:27:19 2011
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-2242: Allow the delimiter to be specified when calling TOKENIZE (markroddy via hashutosh)
+
PIG-2240: Allow any compression codec to be specified in AvroStorage (tomwhite via dvryaboy)
PIG-2229: Pig end-to-end tests should test local mode as well as mr mode (gates)
Modified: pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java?rev=1162237&r1=1162236&r2=1162237&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java Fri Aug 26 20:27:19 2011
@@ -61,7 +61,20 @@ public class TOKENIZE extends EvalFunc<D
" got " + o.getClass().getName();
throw new ExecException(msg, errCode, PigException.BUG);
}
- StringTokenizer tok = new StringTokenizer((String)o, " \",()*", false);
+
+ String delim = " \",()*";
+ if (input.size()==2) {
+ Object d = input.get(1);
+ if (!(d instanceof String)) {
+ int errCode = 2114;
+ String msg = "Expected delim to be chararray, but" +
+ " got " + d.getClass().getName();
+ throw new ExecException(msg, errCode, PigException.BUG);
+ }
+ delim = (String)d;
+ }
+
+ StringTokenizer tok = new StringTokenizer((String)o, delim, false);
while (tok.hasMoreTokens()) {
output.add(mTupleFactory.newTuple(tok.nextToken()));
}
@@ -106,6 +119,10 @@ public class TOKENIZE extends EvalFunc<D
Schema s = new Schema();
s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
funcList.add(new FuncSpec(this.getClass().getName(), s));
+ s = new Schema();
+ s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+ s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+ funcList.add(new FuncSpec(this.getClass().getName(), s));
return funcList;
}
};
Modified: pig/trunk/test/org/apache/pig/test/TestBuiltin.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestBuiltin.java?rev=1162237&r1=1162236&r2=1162237&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestBuiltin.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestBuiltin.java Fri Aug 26 20:27:19 2011
@@ -2309,6 +2309,20 @@ public class TestBuiltin {
rt = i.next();
assertTrue(rt.get(0).equals("789"));
+ // Check when delim specified
+ Tuple t4 = tf.newTuple(2);
+ t4.set(0, "123|456|78\"9");
+ t4.set(1, "|");
+ b = f.exec(t4);
+ assertTrue(b.size()==3);
+ i = b.iterator();
+ rt = i.next();
+ assertTrue(rt.get(0).equals("123"));
+ rt = i.next();
+ assertTrue(rt.get(0).equals("456"));
+ rt = i.next();
+ assertTrue(rt.get(0).equals("78\"9"));
+
b = f.exec(t2);
assertTrue(b==null);