You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by dv...@apache.org on 2011/11/14 08:27:34 UTC

svn commit: r1201618 - in /pig/trunk: CHANGES.txt src/org/apache/pig/EvalFunc.java src/org/apache/pig/builtin/OutputSchema.java test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java

Author: dvryaboy
Date: Mon Nov 14 07:27:34 2011
New Revision: 1201618

URL: http://svn.apache.org/viewvc?rev=1201618&view=rev
Log:
PIG-2151: Add annotation to specify output schema in Java UDFs (dvryaboy)

Added:
    pig/trunk/src/org/apache/pig/builtin/OutputSchema.java
    pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java
Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/EvalFunc.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1201618&r1=1201617&r2=1201618&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Mon Nov 14 07:27:34 2011
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
 
 IMPROVEMENTS
 
+PIG-2151: Add annotation to specify output schema in Java UDFs (dvryaboy)
+
 PIG-2230: Improved error message for invalid parameter format (xuitingz via olgan)
 
 PIG-2328: Add builtin UDFs for building and using bloom filters (gates)

Modified: pig/trunk/src/org/apache/pig/EvalFunc.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/EvalFunc.java?rev=1201618&r1=1201617&r2=1201618&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/EvalFunc.java (original)
+++ pig/trunk/src/org/apache/pig/EvalFunc.java Mon Nov 14 07:27:34 2011
@@ -22,12 +22,13 @@ import java.io.IOException;
 import java.lang.reflect.ParameterizedType;
 import java.lang.reflect.Type;
 import java.util.List;
-import java.util.Properties;
 import java.util.Stack;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigLogger;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigProgressable;
+import org.apache.pig.builtin.OutputSchema;
 import org.apache.pig.classification.InterfaceAudience;
 import org.apache.pig.classification.InterfaceStability;
 import org.apache.pig.data.Tuple;
@@ -35,9 +36,8 @@ import org.apache.pig.impl.PigContext;
 import org.apache.pig.impl.logicalLayer.FrontendException;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
 import org.apache.pig.impl.util.UDFContext;
-import org.apache.pig.LoadPushDown.RequiredFieldList;
-import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigLogger;
-import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigProgressable;
+import org.apache.pig.impl.util.Utils;
+import org.apache.pig.parser.ParserException;
 
 
 /**
@@ -215,11 +215,20 @@ public abstract class EvalFunc<T>  {
      * Report the schema of the output of this UDF.  Pig will make use of
      * this in error checking, optimization, and planning.  The schema
      * of input data to this UDF is provided.
+     * <p>
+     * The default implementation interprets the {@link OutputSchema} annotation,
+     * if one is present. Otherwise, it returns <code>null</code> (no known output schema).
+     *
      * @param input Schema of the input
      * @return Schema of the output
      */
     public Schema outputSchema(Schema input) {
-        return null;
+        OutputSchema schema = this.getClass().getAnnotation(OutputSchema.class);
+        try {
+            return (schema == null) ? null : Utils.getSchemaFromString(schema.value());
+        } catch (ParserException e) {
+            throw new RuntimeException(e);
+        }
     }
     
     /**

Added: pig/trunk/src/org/apache/pig/builtin/OutputSchema.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/OutputSchema.java?rev=1201618&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/OutputSchema.java (added)
+++ pig/trunk/src/org/apache/pig/builtin/OutputSchema.java Mon Nov 14 07:27:34 2011
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
+
+/**
+ * An EvalFunc can annotated with an <code>OutputSchema</code> to
+ * tell Pig what the expected output is. This can be used in place
+ * of {@link EvalFunc#outputSchema(Schema)}
+ * <p>
+ * The default implementation of {@link EvalFunc#outputSchema(Schema)}
+ * will look at this annotation and return an interpreted schema, if the annotation is present.
+ * <p>
+ * Implementing a custom {@link EvalFunc#outputSchema(Schema)} will
+ * override the annotation (unless you deal with it explicitly, or by calling <code>super.outputSchema(schema)</code>).
+ * <p>
+ * Here's an example of a complex schema declared in an annotation:
+ * <code>@OutputSchema("y:bag{t:tuple(len:int,word:chararray)}")</code>
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Unstable
+@Documented
+@Retention(value=RetentionPolicy.RUNTIME)
+public @interface OutputSchema {
+    String value();
+}

Added: pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java?rev=1201618&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java (added)
+++ pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java Mon Nov 14 07:27:34 2011
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.test;
+
+import static org.junit.Assert.*;
+import java.io.IOException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.builtin.OutputSchema;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class TestEvalFuncOutputAnnotation {
+
+    @OutputSchema("foo:chararray")
+    public static class AnnotatedFunc extends EvalFunc<String> {
+        @Override
+        public String exec(Tuple input) throws IOException {
+            return null;
+        }
+    }
+
+    @OutputSchema("foo:chararray")
+    public static class OverriddenFunc extends EvalFunc<String> {
+        @Override
+        public String exec(Tuple input) throws IOException {
+            return null;
+        }
+        @Override
+        public Schema outputSchema(Schema input) {
+            return new Schema(new FieldSchema("bar", DataType.CHARARRAY));
+        }
+    }
+
+    // This would give the same result: "y:bag{tuple(len:int,word:chararray)}"
+    @OutputSchema("y:bag{t:tuple(len:int,word:chararray)}")
+    public static class ComplexFunc extends EvalFunc<DataBag> {
+        @Override
+        public DataBag exec(Tuple input) throws IOException {
+            return null;
+        }
+    }
+
+    public static class UnannotatedFunc extends EvalFunc<DataBag> {
+        @Override
+        public DataBag exec(Tuple input) throws IOException {
+            return null;
+        }
+    }
+
+    @Test
+    public void testSimpleAnnotation() {
+        EvalFunc<String> myFunc =new AnnotatedFunc();
+        Schema s = new Schema(new FieldSchema("foo", DataType.CHARARRAY));
+        assertEquals(s, myFunc.outputSchema(null));
+    }
+
+    @Test
+    public void testOverriddenAnnotation() {
+        EvalFunc<String> myFunc =new OverriddenFunc();
+        Schema s = new Schema(new FieldSchema("bar", DataType.CHARARRAY));
+        assertEquals(s, myFunc.outputSchema(null));
+    }
+
+    @Test
+    public void testUnannotated() {
+        EvalFunc<DataBag> myFunc = new UnannotatedFunc();
+        assertNull(myFunc.outputSchema(null));
+    }
+
+    @Test
+    public void testComplex() throws FrontendException {
+        EvalFunc<DataBag> myFunc = new ComplexFunc();
+      //  y:bag{t:tuple(len:int,word:chararray)}
+        Schema ts = new Schema(Lists.asList(new FieldSchema("len", DataType.INTEGER),
+                new FieldSchema[] {new FieldSchema("word", DataType.CHARARRAY)}));
+        // Pig silently drops the name of a tuple the bag hold, since it's more or less invisible.
+       FieldSchema bfs = new FieldSchema(null, ts, DataType.TUPLE);
+       Schema bs = new Schema();
+       bs.add(bfs);
+       Schema s = new Schema();
+       s.add(new FieldSchema("y", bs, DataType.BAG));
+       assertEquals(s, myFunc.outputSchema(null));
+    }
+
+
+}