You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by sm...@apache.org on 2009/03/03 01:22:25 UTC

svn commit: r749487 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/builtin/TOKENIZE.java test/org/apache/pig/test/TestLogicalPlanBuilder.java

Author: sms
Date: Tue Mar  3 00:22:24 2009
New Revision: 749487

URL: http://svn.apache.org/viewvc?rev=749487&view=rev
Log:
PIG-684: outputSchema method in TOKENIZE is broken (thejas via sms)

Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=749487&r1=749486&r2=749487&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Tue Mar  3 00:22:24 2009
@@ -434,3 +434,5 @@
     PIG-591: Error handling phase four (sms via pradeepkth)
 
     PIG-664: Semantics of * is not consistent (sms)
+
+    PIG-684: outputSchema method in TOKENIZE is broken (thejas via sms)

Modified: hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java?rev=749487&r1=749486&r2=749487&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java Tue Mar  3 00:22:24 2009
@@ -28,6 +28,7 @@
 import org.apache.pig.data.DataType;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
+import org.apache.pig.impl.logicalLayer.FrontendException;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
 
 
@@ -58,8 +59,32 @@
 
     @Override
     public Schema outputSchema(Schema input) {
-        Schema schema = new Schema(new Schema.FieldSchema("token",
-            DataType.CHARARRAY));
-        return schema;
+        
+        try {
+            Schema.FieldSchema tokenFs = new Schema.FieldSchema("token", 
+                    DataType.CHARARRAY); 
+            Schema tupleSchema = new Schema(tokenFs);
+
+            Schema.FieldSchema tupleFs;
+            tupleFs = new Schema.FieldSchema("tuple_of_tokens", tupleSchema,
+                    DataType.TUPLE);
+
+            Schema bagSchema = new Schema(tupleFs);
+            bagSchema.setTwoLevelAccessRequired(true);
+            Schema.FieldSchema bagFs = new Schema.FieldSchema(
+                        "bag_of_tokenTuples",bagSchema, DataType.BAG);
+            
+            return new Schema(bagFs); 
+            
+            
+            
+        } catch (FrontendException e) {
+            // throwing RTE because
+            //above schema creation is not expected to throw an exception
+            // and also because superclass does not throw exception
+            throw new RuntimeException("Unable to compute TOKENIZE schema.");
+        }   
     }
-}
+
+    
+};

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java?rev=749487&r1=749486&r2=749487&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java Tue Mar  3 00:22:24 2009
@@ -1879,6 +1879,31 @@
         buildPlan(query);
     }
 
+
+    @Test
+    public void testTokenizeSchema()  throws FrontendException, ParseException {
+        LogicalPlan lp;
+        LOForEach foreach;
+
+        buildPlan("a = load 'one' as (f1: chararray);");
+        lp = buildPlan("b = foreach a generate TOKENIZE(f1);");
+        foreach = (LOForEach) lp.getLeaves().get(0);
+
+        Schema.FieldSchema tokenFs = new Schema.FieldSchema("token", 
+                DataType.CHARARRAY); 
+        Schema tupleSchema = new Schema(tokenFs);
+
+        Schema.FieldSchema tupleFs;
+        tupleFs = new Schema.FieldSchema("tuple_of_tokens", tupleSchema,
+                DataType.TUPLE);
+
+        Schema bagSchema = new Schema(tupleFs);
+        Schema.FieldSchema bagFs = new Schema.FieldSchema(
+                    "bag_of_tokenTuples",bagSchema, DataType.BAG);
+        
+        assertTrue(Schema.equals(foreach.getSchema(), new Schema(bagFs), false, true));
+    }
+    
     private void printPlan(LogicalPlan lp) {
         LOPrinter graphPrinter = new LOPrinter(System.err, lp);
         System.err.println("Printing the logical plan");