You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2013/04/17 18:49:21 UTC
svn commit: r1468996 - in /pig/trunk: ./ src/org/apache/pig/builtin/
src/org/apache/pig/parser/ test/ test/org/apache/pig/builtin/
test/org/apache/pig/test/
Author: cheolsoo
Date: Wed Apr 17 16:49:21 2013
New Revision: 1468996
URL: http://svn.apache.org/r1468996
Log:
PIG-3269: In operator support (cheolsoo)
Added:
pig/trunk/src/org/apache/pig/builtin/IN.java
pig/trunk/test/org/apache/pig/builtin/TestInUdf.java
pig/trunk/test/org/apache/pig/test/TestIn.java
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/parser/AstPrinter.g
pig/trunk/src/org/apache/pig/parser/AstValidator.g
pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g
pig/trunk/src/org/apache/pig/parser/QueryLexer.g
pig/trunk/src/org/apache/pig/parser/QueryParser.g
pig/trunk/test/commit-tests
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1468996&r1=1468995&r2=1468996&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Apr 17 16:49:21 2013
@@ -28,6 +28,8 @@ PIG-3174: Remove rpm and deb artifacts
IMPROVEMENTS
+PIG-3269: In operator support (cheolsoo)
+
PIG-200: Pig Performance Benchmarks (daijy)
PIG-3261: User set PIG_CLASSPATH entries must be prepended to the CLASSPATH, not
Added: pig/trunk/src/org/apache/pig/builtin/IN.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/IN.java?rev=1468996&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/IN.java (added)
+++ pig/trunk/src/org/apache/pig/builtin/IN.java Wed Apr 17 16:49:21 2013
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+import java.io.IOException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+
+/**
+ * IN EvalFunc mimics the behavior of SQL IN operator. It takes more than or
+ * equal to two arguments and compares the first argument against the rest one
+ * by one. If it finds a match, true is returned; otherwise, false is returned.
+ * If the first argument is null, it always returns false.
+ */
+public class IN extends EvalFunc<Boolean> {
+ @Override
+ public Boolean exec(Tuple input) throws IOException {
+ if (input.size() < 2) {
+ throw new ExecException("Invalid number of args");
+ }
+
+ Object expr = input.get(0);
+ if (expr == null) {
+ // If 1st argument (lhs operand of IN operator) is null, always
+ // return false.
+ return false;
+ }
+
+ for (int i = 1; i < input.size(); i++) {
+ if (expr.equals(input.get(i))) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public Schema outputSchema(Schema input) {
+ return new Schema(new Schema.FieldSchema(null, DataType.BOOLEAN));
+ }
+};
Modified: pig/trunk/src/org/apache/pig/parser/AstPrinter.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/AstPrinter.g?rev=1468996&r1=1468995&r2=1468996&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/AstPrinter.g (original)
+++ pig/trunk/src/org/apache/pig/parser/AstPrinter.g Wed Apr 17 16:49:21 2013
@@ -281,10 +281,15 @@ cond
| ^( NOT { sb.append(" ").append($NOT.text).append(" ("); } cond { sb.append(")"); } )
| ^( NULL expr { sb.append(" IS "); } (NOT { sb.append($NOT.text).append(" "); } )? { sb.append($NULL.text); } )
| ^( rel_op expr { sb.append(" ").append($rel_op.result).append(" "); } expr )
+ | in_eval
| func_eval
| ^( BOOL_COND expr )
;
+in_eval
+ : ^( IN { sb.append(" " + $IN.text + "("); } expr ( { sb.append(", "); } expr )+ { sb.append(") "); } )
+;
+
func_eval
: ^( FUNC_EVAL func_name { sb.append("("); } real_arg ( { sb.append(", "); } real_arg)* { sb.append(")"); } )
| ^( FUNC_EVAL func_name { sb.append("()"); } )
@@ -670,10 +675,11 @@ eid : rel_str_op
| LEFT { sb.append($LEFT.text); }
| RIGHT { sb.append($RIGHT.text); }
| FULL { sb.append($FULL.text); }
- | IDENTIFIER { sb.append($IDENTIFIER.text); }
- | TOBAG { sb.append("TOBAG"); }
- | TOMAP { sb.append("TOMAP"); }
- | TOTUPLE { sb.append("TOTUPLE"); }
+ | IDENTIFIER { sb.append($IDENTIFIER.text); }
+ | TOBAG { sb.append($TOBAG.text); }
+ | TOMAP { sb.append($TOMAP.text); }
+ | TOTUPLE { sb.append($TOTUPLE.text); }
+ | IN { sb.append($IN.text); }
;
// relational operator
Modified: pig/trunk/src/org/apache/pig/parser/AstValidator.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/AstValidator.g?rev=1468996&r1=1468995&r2=1468996&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/AstValidator.g (original)
+++ pig/trunk/src/org/apache/pig/parser/AstValidator.g Wed Apr 17 16:49:21 2013
@@ -359,10 +359,14 @@ cond : ^( OR cond cond )
| ^( NOT cond )
| ^( NULL expr NOT? )
| ^( rel_op expr expr )
+ | in_eval
| func_eval
| ^( BOOL_COND expr )
;
+in_eval: ^( IN expr expr+ )
+;
+
func_eval: ^( FUNC_EVAL func_name real_arg* )
;
Modified: pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g?rev=1468996&r1=1468995&r2=1468996&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g (original)
+++ pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g Wed Apr 17 16:49:21 2013
@@ -766,6 +766,10 @@ cond[LogicalExpressionPlan exprPlan] ret
$expr = new RegexExpression( $exprPlan, $e1.expr, $e2.expr );
$expr.setLocation( new SourceLocation( (PigParserNode)$STR_OP_MATCHES ) );
}
+ | in_eval[$exprPlan]
+ {
+ $expr = $in_eval.expr;
+ }
| func_eval[$exprPlan]
{
$expr = $func_eval.expr;
@@ -777,6 +781,16 @@ cond[LogicalExpressionPlan exprPlan] ret
}
;
+in_eval[LogicalExpressionPlan plan] returns[LogicalExpression expr]
+@init {
+ List<LogicalExpression> args = new ArrayList<LogicalExpression>();
+}
+ : ^( IN exp1 = expr[$plan] { args.add( $exp1.expr ); } ( exp2 = expr[$plan] { args.add( $exp2.expr ); } )+ )
+ {
+ SourceLocation loc = new SourceLocation( (PigParserNode)$IN );
+ $expr = builder.buildUDF( loc, $plan, "IN", args );
+ }
+;
func_eval[LogicalExpressionPlan plan] returns[LogicalExpression expr]
@init {
Modified: pig/trunk/src/org/apache/pig/parser/QueryLexer.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/QueryLexer.g?rev=1468996&r1=1468995&r2=1468996&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/QueryLexer.g (original)
+++ pig/trunk/src/org/apache/pig/parser/QueryLexer.g Wed Apr 17 16:49:21 2013
@@ -285,6 +285,9 @@ STR_OP_NE : 'NEQ'
STR_OP_MATCHES : 'MATCHES'
;
+IN : 'IN'
+;
+
TRUE : 'TRUE'
;
Modified: pig/trunk/src/org/apache/pig/parser/QueryParser.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/QueryParser.g?rev=1468996&r1=1468995&r2=1468996&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/QueryParser.g (original)
+++ pig/trunk/src/org/apache/pig/parser/QueryParser.g Wed Apr 17 16:49:21 2013
@@ -620,6 +620,7 @@ unary_cond
}
: exp1 = expr
( ( IS NOT? NULL -> ^( NULL $exp1 NOT? ) )
+ | ( IN LEFT_PAREN ( expr ( COMMA expr )* ) RIGHT_PAREN -> ^( IN $exp1 expr+ ) )
| ( rel_op exp2 = expr -> ^( rel_op $exp1 $exp2 ) )
| ( -> ^(BOOL_COND expr) ) )
;
@@ -973,5 +974,6 @@ rel_str_op : STR_OP_EQ
reserved_identifier_whitelist : RANK
| CUBE
+ | IN
;
Modified: pig/trunk/test/commit-tests
URL: http://svn.apache.org/viewvc/pig/trunk/test/commit-tests?rev=1468996&r1=1468995&r2=1468996&view=diff
==============================================================================
--- pig/trunk/test/commit-tests (original)
+++ pig/trunk/test/commit-tests Wed Apr 17 16:49:21 2013
@@ -17,6 +17,8 @@
**/TestFuncSpec.java
**/TestGTOrEqual.java
**/TestGreaterThan.java
+**/TestIn.java
+**/TestInUdf.java
**/TestInputOutputFileValidator.java
**/TestInstantiateFunc.java
**/TestLTOrEqual.java
Added: pig/trunk/test/org/apache/pig/builtin/TestInUdf.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/builtin/TestInUdf.java?rev=1468996&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/builtin/TestInUdf.java (added)
+++ pig/trunk/test/org/apache/pig/builtin/TestInUdf.java Wed Apr 17 16:49:21 2013
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.joda.time.DateTime;
+import org.junit.Test;
+
+public class TestInUdf {
+ private static IN in = new IN();
+
+ /**
+ * Verify that IN EvalFunc works with various types of arguments.
+ * @throws IOException
+ */
+ @Test
+ public void testDataType() throws IOException {
+ Object[][] args = {
+ { new DataByteArray(new byte[] {'0'}),
+ new DataByteArray(new byte[] {'1'}),
+ },
+ { new Boolean(true),
+ new Boolean(false),
+ },
+ { new Integer(0),
+ new Integer(1),
+ },
+ { new Long(0l),
+ new Long(1l),
+ },
+ { new Float(0f),
+ new Float(1f),
+ },
+ { new Double(0d),
+ new Double(1d),
+ },
+ { new DateTime(0l),
+ new DateTime(1l),
+ },
+ { new String("0"),
+ new String("1"),
+ },
+ { new BigInteger("0"),
+ new BigInteger("1"),
+ },
+ { new BigDecimal("0.0"),
+ new BigDecimal("1.0"),
+ },
+ };
+
+ for (int i = 0; i < args.length; i++) {
+ Tuple input = TupleFactory.getInstance().newTuple();
+ input.append(args[i][0]);
+ input.append(args[i][1]);
+ // x IN (y)
+ assertFalse(in.exec(input));
+ input.append(args[i][0]);
+ // x IN (x, y)
+ assertTrue(in.exec(input));
+ }
+ }
+
+ /**
+ * Verify that IN EvalFunc returns false when first argument is null.
+ * @throws IOException
+ */
+ @Test
+ public void testNull() throws IOException {
+ Tuple input = TupleFactory.getInstance().newTuple();
+ input.append(null);
+ input.append(null);
+ // null IN (null)
+ assertFalse(in.exec(input));
+ }
+
+ /**
+ * Verify that IN EvalFunc throws ExecException with an invalid number of
+ * arguments.
+ * @throws IOException
+ */
+ @Test(expected = ExecException.class)
+ public void testInvalidNumOfArgs() throws IOException {
+ Tuple input = TupleFactory.getInstance().newTuple();
+ input.append(new Object());
+ // x IN ()
+ in.exec(input);
+ fail("IN must throw ExecException for " + input.size() + " argument(s)");
+ }
+}
Added: pig/trunk/test/org/apache/pig/test/TestIn.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestIn.java?rev=1468996&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestIn.java (added)
+++ pig/trunk/test/org/apache/pig/test/TestIn.java Wed Apr 17 16:49:21 2013
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.test;
+
+import static junit.framework.Assert.assertEquals;
+import static org.apache.pig.builtin.mock.Storage.resetData;
+import static org.apache.pig.builtin.mock.Storage.tuple;
+import static org.junit.Assert.fail;
+
+import java.util.List;
+
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.builtin.mock.Storage.Data;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.junit.Test;
+
+public class TestIn {
+
+ /**
+ * Verify that IN operator works with FILTER BY.
+ * @throws Exception
+ */
+ @Test
+ public void testWithFilter() throws Exception {
+ PigServer pigServer = new PigServer(ExecType.LOCAL);
+ Data data = resetData(pigServer);
+
+ data.set("foo",
+ tuple(1),
+ tuple(2),
+ tuple(3),
+ tuple(4),
+ tuple(5)
+ );
+
+ pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
+ pigServer.registerQuery("B = FILTER A BY i IN (1, 2, 3);");
+ pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
+
+ List<Tuple> out = data.get("bar");
+ assertEquals(3, out.size());
+ assertEquals(tuple(1), out.get(0));
+ assertEquals(tuple(2), out.get(1));
+ assertEquals(tuple(3), out.get(2));
+ }
+
+ /**
+ * Verify that IN operator works with ? operator.
+ * @throws Exception
+ */
+ @Test
+ public void testWithBincond() throws Exception {
+ PigServer pigServer = new PigServer(ExecType.LOCAL);
+ Data data = resetData(pigServer);
+
+ data.set("foo",
+ tuple(1),
+ tuple(2),
+ tuple(3),
+ tuple(4),
+ tuple(5)
+ );
+
+ pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
+ pigServer.registerQuery("B = FOREACH A GENERATE (i IN (1, 3, 5) ? 'ODD' : 'EVEN');");
+ pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
+
+ List<Tuple> out = data.get("bar");
+ assertEquals(5, out.size());
+ assertEquals(tuple("ODD"), out.get(0));
+ assertEquals(tuple("EVEN"), out.get(1));
+ assertEquals(tuple("ODD"), out.get(2));
+ assertEquals(tuple("EVEN"), out.get(3));
+ assertEquals(tuple("ODD"), out.get(4));
+ }
+
+ /**
+ * Verify that IN operator works with SPLIT.
+ * @throws Exception
+ */
+ @Test
+ public void testWithSplit() throws Exception {
+ PigServer pigServer = new PigServer(ExecType.LOCAL);
+ Data data = resetData(pigServer);
+
+ data.set("foo",
+ tuple(1),
+ tuple(2),
+ tuple(3),
+ tuple(4),
+ tuple(5)
+ );
+
+ pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
+ pigServer.registerQuery("SPLIT A INTO B IF i IN (1, 3, 5), C OTHERWISE;");
+ pigServer.registerQuery("STORE B INTO 'odd' USING mock.Storage();");
+ pigServer.registerQuery("STORE C INTO 'even' USING mock.Storage();");
+
+ List<Tuple> out = data.get("odd");
+ assertEquals(3, out.size());
+ assertEquals(tuple(1), out.get(0));
+ assertEquals(tuple(3), out.get(1));
+ assertEquals(tuple(5), out.get(2));
+
+ out = data.get("even");
+ assertEquals(2, out.size());
+ assertEquals(tuple(2), out.get(0));
+ assertEquals(tuple(4), out.get(1));
+ }
+
+ /**
+ * Verify that IN operator throws FrontendException when no operand is given.
+ * @throws Exception
+ */
+ @Test(expected = FrontendException.class)
+ public void testMissingRhsOperand() throws Exception {
+ PigServer pigServer = new PigServer(ExecType.LOCAL);
+ Data data = resetData(pigServer);
+
+ data.set("foo",
+ tuple(1),
+ tuple(2),
+ tuple(3),
+ tuple(4),
+ tuple(5)
+ );
+
+ pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
+ pigServer.registerQuery("B = FILTER A BY i IN ();"); // No operand
+ pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
+ fail("FrontendException must be thrown since no rhs operand is given to IN.");
+ }
+}