You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2009/08/07 04:27:16 UTC
svn commit: r801865 - in /hadoop/pig/trunk: CHANGES.txt
src/org/apache/pig/builtin/TOKENIZE.java
test/org/apache/pig/test/TestBuiltin.java
test/org/apache/pig/test/TestPigContext.java
Author: daijy
Date: Fri Aug 7 02:27:15 2009
New Revision: 801865
URL: http://svn.apache.org/viewvc?rev=801865&view=rev
Log:
PIG-905: TOKENIZE throws exception on null data
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java
hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java
hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=801865&r1=801864&r2=801865&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri Aug 7 02:27:15 2009
@@ -46,6 +46,8 @@
BUG FIXES
+ PIG-905: TOKENIZE throws exception on null data (daijy)
+
PIG-901: InputSplit (SliceWrapper) created by Pig is big in size due to
serialized PigContext (pradeepkth)
Modified: hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java?rev=801865&r1=801864&r2=801865&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java Fri Aug 7 02:27:15 2009
@@ -18,6 +18,8 @@
package org.apache.pig.builtin;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import java.util.StringTokenizer;
import org.apache.pig.EvalFunc;
@@ -30,7 +32,7 @@
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-
+import org.apache.pig.FuncSpec;
public class TOKENIZE extends EvalFunc<DataBag> {
TupleFactory mTupleFactory = TupleFactory.getInstance();
@@ -39,8 +41,14 @@
@Override
public DataBag exec(Tuple input) throws IOException {
try {
- DataBag output = mBagFactory.newDefaultBag();
+ if (input==null)
+ return null;
+ if (input.size()==0)
+ return null;
Object o = input.get(0);
+ if (o==null)
+ return null;
+ DataBag output = mBagFactory.newDefaultBag();
if (!(o instanceof String)) {
int errCode = 2114;
String msg = "Expected input to be chararray, but" +
@@ -86,5 +94,11 @@
}
}
-
+ public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
+ List<FuncSpec> funcList = new ArrayList<FuncSpec>();
+ Schema s = new Schema();
+ s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+ funcList.add(new FuncSpec(this.getClass().getName(), s));
+ return funcList;
+ }
};
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java?rev=801865&r1=801864&r2=801865&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java Fri Aug 7 02:27:15 2009
@@ -1355,6 +1355,33 @@
assertTrue(f1.equals(f2));
}
+
+ @Test
+ public void testTOKENIZE() throws Exception {
+ TupleFactory tf = TupleFactory.getInstance();
+ Tuple t1 = tf.newTuple(1);
+ t1.set(0, "123 456\"789");
+ Tuple t2 = tf.newTuple(1);
+ t2.set(0, null);
+ Tuple t3 = tf.newTuple(0);
+
+ TOKENIZE f = new TOKENIZE();
+ DataBag b = f.exec(t1);
+ assertTrue(b.size()==3);
+ Iterator<Tuple> i = b.iterator();
+ Tuple rt = i.next();
+ assertTrue(rt.get(0).equals("123"));
+ rt = i.next();
+ assertTrue(rt.get(0).equals("456"));
+ rt = i.next();
+ assertTrue(rt.get(0).equals("789"));
+
+ b = f.exec(t2);
+ assertTrue(b==null);
+
+ b = f.exec(t3);
+ assertTrue(b==null);
+ }
@Test
public void testDIFF() throws Exception {
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java?rev=801865&r1=801864&r2=801865&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java Fri Aug 7 02:27:15 2009
@@ -213,7 +213,7 @@
private List<String> getCommands() {
List<String> commands = new ArrayList<String>();
commands.add("my_input = LOAD '" + Util.encodeEscape(input.getAbsolutePath()) + "' USING PigStorage();");
- commands.add("words = FOREACH my_input GENERATE FLATTEN(TOKENIZE(*));");
+ commands.add("words = FOREACH my_input GENERATE FLATTEN(TOKENIZE($0));");
commands.add("grouped = GROUP words BY $0;");
commands.add("counts = FOREACH grouped GENERATE group, COUNT(words);");
return commands;