You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by dv...@apache.org on 2011/11/14 08:27:34 UTC
svn commit: r1201618 - in /pig/trunk: CHANGES.txt
src/org/apache/pig/EvalFunc.java src/org/apache/pig/builtin/OutputSchema.java
test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java
Author: dvryaboy
Date: Mon Nov 14 07:27:34 2011
New Revision: 1201618
URL: http://svn.apache.org/viewvc?rev=1201618&view=rev
Log:
PIG-2151: Add annotation to specify output schema in Java UDFs (dvryaboy)
Added:
pig/trunk/src/org/apache/pig/builtin/OutputSchema.java
pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/EvalFunc.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1201618&r1=1201617&r2=1201618&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Mon Nov 14 07:27:34 2011
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-2151: Add annotation to specify output schema in Java UDFs (dvryaboy)
+
PIG-2230: Improved error message for invalid parameter format (xuitingz via olgan)
PIG-2328: Add builtin UDFs for building and using bloom filters (gates)
Modified: pig/trunk/src/org/apache/pig/EvalFunc.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/EvalFunc.java?rev=1201618&r1=1201617&r2=1201618&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/EvalFunc.java (original)
+++ pig/trunk/src/org/apache/pig/EvalFunc.java Mon Nov 14 07:27:34 2011
@@ -22,12 +22,13 @@ import java.io.IOException;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
import java.util.List;
-import java.util.Properties;
import java.util.Stack;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigLogger;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigProgressable;
+import org.apache.pig.builtin.OutputSchema;
import org.apache.pig.classification.InterfaceAudience;
import org.apache.pig.classification.InterfaceStability;
import org.apache.pig.data.Tuple;
@@ -35,9 +36,8 @@ import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.UDFContext;
-import org.apache.pig.LoadPushDown.RequiredFieldList;
-import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigLogger;
-import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigProgressable;
+import org.apache.pig.impl.util.Utils;
+import org.apache.pig.parser.ParserException;
/**
@@ -215,11 +215,20 @@ public abstract class EvalFunc<T> {
* Report the schema of the output of this UDF. Pig will make use of
* this in error checking, optimization, and planning. The schema
* of input data to this UDF is provided.
+ * <p>
+ * The default implementation interprets the {@link OutputSchema} annotation,
+ * if one is present. Otherwise, it returns <code>null</code> (no known output schema).
+ *
* @param input Schema of the input
* @return Schema of the output
*/
public Schema outputSchema(Schema input) {
- return null;
+ OutputSchema schema = this.getClass().getAnnotation(OutputSchema.class);
+ try {
+ return (schema == null) ? null : Utils.getSchemaFromString(schema.value());
+ } catch (ParserException e) {
+ throw new RuntimeException(e);
+ }
}
/**
Added: pig/trunk/src/org/apache/pig/builtin/OutputSchema.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/OutputSchema.java?rev=1201618&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/OutputSchema.java (added)
+++ pig/trunk/src/org/apache/pig/builtin/OutputSchema.java Mon Nov 14 07:27:34 2011
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
+
+/**
+ * An EvalFunc can annotated with an <code>OutputSchema</code> to
+ * tell Pig what the expected output is. This can be used in place
+ * of {@link EvalFunc#outputSchema(Schema)}
+ * <p>
+ * The default implementation of {@link EvalFunc#outputSchema(Schema)}
+ * will look at this annotation and return an interpreted schema, if the annotation is present.
+ * <p>
+ * Implementing a custom {@link EvalFunc#outputSchema(Schema)} will
+ * override the annotation (unless you deal with it explicitly, or by calling <code>super.outputSchema(schema)</code>).
+ * <p>
+ * Here's an example of a complex schema declared in an annotation:
+ * <code>@OutputSchema("y:bag{t:tuple(len:int,word:chararray)}")</code>
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Unstable
+@Documented
+@Retention(value=RetentionPolicy.RUNTIME)
+public @interface OutputSchema {
+ String value();
+}
Added: pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java?rev=1201618&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java (added)
+++ pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java Mon Nov 14 07:27:34 2011
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.test;
+
+import static org.junit.Assert.*;
+import java.io.IOException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.builtin.OutputSchema;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class TestEvalFuncOutputAnnotation {
+
+ @OutputSchema("foo:chararray")
+ public static class AnnotatedFunc extends EvalFunc<String> {
+ @Override
+ public String exec(Tuple input) throws IOException {
+ return null;
+ }
+ }
+
+ @OutputSchema("foo:chararray")
+ public static class OverriddenFunc extends EvalFunc<String> {
+ @Override
+ public String exec(Tuple input) throws IOException {
+ return null;
+ }
+ @Override
+ public Schema outputSchema(Schema input) {
+ return new Schema(new FieldSchema("bar", DataType.CHARARRAY));
+ }
+ }
+
+ // This would give the same result: "y:bag{tuple(len:int,word:chararray)}"
+ @OutputSchema("y:bag{t:tuple(len:int,word:chararray)}")
+ public static class ComplexFunc extends EvalFunc<DataBag> {
+ @Override
+ public DataBag exec(Tuple input) throws IOException {
+ return null;
+ }
+ }
+
+ public static class UnannotatedFunc extends EvalFunc<DataBag> {
+ @Override
+ public DataBag exec(Tuple input) throws IOException {
+ return null;
+ }
+ }
+
+ @Test
+ public void testSimpleAnnotation() {
+ EvalFunc<String> myFunc =new AnnotatedFunc();
+ Schema s = new Schema(new FieldSchema("foo", DataType.CHARARRAY));
+ assertEquals(s, myFunc.outputSchema(null));
+ }
+
+ @Test
+ public void testOverriddenAnnotation() {
+ EvalFunc<String> myFunc =new OverriddenFunc();
+ Schema s = new Schema(new FieldSchema("bar", DataType.CHARARRAY));
+ assertEquals(s, myFunc.outputSchema(null));
+ }
+
+ @Test
+ public void testUnannotated() {
+ EvalFunc<DataBag> myFunc = new UnannotatedFunc();
+ assertNull(myFunc.outputSchema(null));
+ }
+
+ @Test
+ public void testComplex() throws FrontendException {
+ EvalFunc<DataBag> myFunc = new ComplexFunc();
+ // y:bag{t:tuple(len:int,word:chararray)}
+ Schema ts = new Schema(Lists.asList(new FieldSchema("len", DataType.INTEGER),
+ new FieldSchema[] {new FieldSchema("word", DataType.CHARARRAY)}));
+ // Pig silently drops the name of a tuple the bag hold, since it's more or less invisible.
+ FieldSchema bfs = new FieldSchema(null, ts, DataType.TUPLE);
+ Schema bs = new Schema();
+ bs.add(bfs);
+ Schema s = new Schema();
+ s.add(new FieldSchema("y", bs, DataType.BAG));
+ assertEquals(s, myFunc.outputSchema(null));
+ }
+
+
+}