You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2009/08/07 04:27:16 UTC

svn commit: r801865 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/builtin/TOKENIZE.java test/org/apache/pig/test/TestBuiltin.java test/org/apache/pig/test/TestPigContext.java

Author: daijy
Date: Fri Aug  7 02:27:15 2009
New Revision: 801865

URL: http://svn.apache.org/viewvc?rev=801865&view=rev
Log:
PIG-905: TOKENIZE throws exception on null data

Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=801865&r1=801864&r2=801865&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri Aug  7 02:27:15 2009
@@ -46,6 +46,8 @@
 
 BUG FIXES
     
+    PIG-905: TOKENIZE throws exception on null data (daijy)
+
     PIG-901: InputSplit (SliceWrapper) created by Pig is big in size due to
     serialized PigContext (pradeepkth)
 

Modified: hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java?rev=801865&r1=801864&r2=801865&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java Fri Aug  7 02:27:15 2009
@@ -18,6 +18,8 @@
 package org.apache.pig.builtin;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.StringTokenizer;
 
 import org.apache.pig.EvalFunc;
@@ -30,7 +32,7 @@
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.logicalLayer.FrontendException;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
-
+import org.apache.pig.FuncSpec;
 
 public class TOKENIZE extends EvalFunc<DataBag> {
     TupleFactory mTupleFactory = TupleFactory.getInstance();
@@ -39,8 +41,14 @@
     @Override
     public DataBag exec(Tuple input) throws IOException {
         try {
-            DataBag output = mBagFactory.newDefaultBag();
+            if (input==null)
+                return null;
+            if (input.size()==0)
+                return null;
             Object o = input.get(0);
+            if (o==null)
+                return null;
+            DataBag output = mBagFactory.newDefaultBag();
             if (!(o instanceof String)) {
             	int errCode = 2114;
             	String msg = "Expected input to be chararray, but" +
@@ -86,5 +94,11 @@
         }   
     }
 
-    
+    public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
+        List<FuncSpec> funcList = new ArrayList<FuncSpec>();
+        Schema s = new Schema();
+        s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+        funcList.add(new FuncSpec(this.getClass().getName(), s));
+        return funcList;
+    }
 };

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java?rev=801865&r1=801864&r2=801865&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java Fri Aug  7 02:27:15 2009
@@ -1355,6 +1355,33 @@
         
         assertTrue(f1.equals(f2));        
     }
+    
+    @Test
+    public void testTOKENIZE() throws Exception {
+        TupleFactory tf = TupleFactory.getInstance();
+        Tuple t1 = tf.newTuple(1);
+        t1.set(0, "123 456\"789");
+        Tuple t2 = tf.newTuple(1);
+        t2.set(0, null);
+        Tuple t3 = tf.newTuple(0);
+        
+        TOKENIZE f = new TOKENIZE();
+        DataBag b = f.exec(t1);
+        assertTrue(b.size()==3);
+        Iterator<Tuple> i = b.iterator();
+        Tuple rt = i.next();
+        assertTrue(rt.get(0).equals("123"));
+        rt = i.next();
+        assertTrue(rt.get(0).equals("456"));
+        rt = i.next();
+        assertTrue(rt.get(0).equals("789"));
+        
+        b = f.exec(t2);
+        assertTrue(b==null);
+        
+        b = f.exec(t3);
+        assertTrue(b==null);
+    }
 
     @Test
     public void testDIFF() throws Exception {

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java?rev=801865&r1=801864&r2=801865&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java Fri Aug  7 02:27:15 2009
@@ -213,7 +213,7 @@
     private List<String> getCommands() {
         List<String> commands = new ArrayList<String>();
         commands.add("my_input = LOAD '" + Util.encodeEscape(input.getAbsolutePath()) + "' USING PigStorage();");
-        commands.add("words = FOREACH my_input GENERATE FLATTEN(TOKENIZE(*));");
+        commands.add("words = FOREACH my_input GENERATE FLATTEN(TOKENIZE($0));");
         commands.add("grouped = GROUP words BY $0;");
         commands.add("counts = FOREACH grouped GENERATE group, COUNT(words);");
         return commands;