You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2013/04/17 18:49:21 UTC

svn commit: r1468996 - in /pig/trunk: ./ src/org/apache/pig/builtin/ src/org/apache/pig/parser/ test/ test/org/apache/pig/builtin/ test/org/apache/pig/test/

Author: cheolsoo
Date: Wed Apr 17 16:49:21 2013
New Revision: 1468996

URL: http://svn.apache.org/r1468996
Log:
PIG-3269: In operator support (cheolsoo)

Added:
    pig/trunk/src/org/apache/pig/builtin/IN.java
    pig/trunk/test/org/apache/pig/builtin/TestInUdf.java
    pig/trunk/test/org/apache/pig/test/TestIn.java
Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/parser/AstPrinter.g
    pig/trunk/src/org/apache/pig/parser/AstValidator.g
    pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g
    pig/trunk/src/org/apache/pig/parser/QueryLexer.g
    pig/trunk/src/org/apache/pig/parser/QueryParser.g
    pig/trunk/test/commit-tests

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1468996&r1=1468995&r2=1468996&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Apr 17 16:49:21 2013
@@ -28,6 +28,8 @@ PIG-3174:  Remove rpm and deb artifacts 
 
 IMPROVEMENTS
 
+PIG-3269: In operator support (cheolsoo)
+
 PIG-200: Pig Performance Benchmarks (daijy)
 
 PIG-3261: User set PIG_CLASSPATH entries must be prepended to the CLASSPATH, not 

Added: pig/trunk/src/org/apache/pig/builtin/IN.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/IN.java?rev=1468996&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/IN.java (added)
+++ pig/trunk/src/org/apache/pig/builtin/IN.java Wed Apr 17 16:49:21 2013
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+import java.io.IOException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+
+/**
+ * IN EvalFunc mimics the behavior of SQL IN operator. It takes more than or
+ * equal to two arguments and compares the first argument against the rest one
+ * by one. If it finds a match, true is returned; otherwise, false is returned.
+ * If the first argument is null, it always returns false.
+ */
+public class IN extends EvalFunc<Boolean> {
+    @Override
+    public Boolean exec(Tuple input) throws IOException {
+        if (input.size() < 2) {
+            throw new ExecException("Invalid number of args");
+        }
+
+        Object expr = input.get(0);
+        if (expr == null) {
+            // If 1st argument (lhs operand of IN operator) is null, always
+            // return false.
+            return false;
+        }
+
+        for (int i = 1; i < input.size(); i++) {
+            if (expr.equals(input.get(i))) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    @Override
+    public Schema outputSchema(Schema input) {
+        return new Schema(new Schema.FieldSchema(null, DataType.BOOLEAN));
+    }
+};

Modified: pig/trunk/src/org/apache/pig/parser/AstPrinter.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/AstPrinter.g?rev=1468996&r1=1468995&r2=1468996&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/AstPrinter.g (original)
+++ pig/trunk/src/org/apache/pig/parser/AstPrinter.g Wed Apr 17 16:49:21 2013
@@ -281,10 +281,15 @@ cond
     | ^( NOT { sb.append(" ").append($NOT.text).append(" ("); } cond { sb.append(")"); } )
     | ^( NULL expr { sb.append(" IS "); } (NOT { sb.append($NOT.text).append(" "); } )?  { sb.append($NULL.text); } )
     | ^( rel_op expr { sb.append(" ").append($rel_op.result).append(" "); } expr )
+    | in_eval
     | func_eval
     | ^( BOOL_COND expr )
 ;
 
+in_eval
+    : ^( IN { sb.append(" " + $IN.text + "("); } expr ( { sb.append(", "); } expr )+ { sb.append(") "); } )
+;
+
 func_eval
     : ^( FUNC_EVAL func_name { sb.append("("); } real_arg ( { sb.append(", "); } real_arg)* { sb.append(")"); } )
     | ^( FUNC_EVAL func_name  { sb.append("()"); } )
@@ -670,10 +675,11 @@ eid : rel_str_op
     | LEFT      { sb.append($LEFT.text); }
     | RIGHT     { sb.append($RIGHT.text); }
     | FULL      { sb.append($FULL.text); }
-    | IDENTIFIER    { sb.append($IDENTIFIER.text); }
-    | TOBAG    { sb.append("TOBAG"); }
-    | TOMAP    { sb.append("TOMAP"); }
-    | TOTUPLE    { sb.append("TOTUPLE"); }
+    | IDENTIFIER { sb.append($IDENTIFIER.text); }
+    | TOBAG      { sb.append($TOBAG.text); }
+    | TOMAP      { sb.append($TOMAP.text); }
+    | TOTUPLE    { sb.append($TOTUPLE.text); }
+    | IN         { sb.append($IN.text); }
 ;
 
 // relational operator

Modified: pig/trunk/src/org/apache/pig/parser/AstValidator.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/AstValidator.g?rev=1468996&r1=1468995&r2=1468996&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/AstValidator.g (original)
+++ pig/trunk/src/org/apache/pig/parser/AstValidator.g Wed Apr 17 16:49:21 2013
@@ -359,10 +359,14 @@ cond : ^( OR cond cond )
      | ^( NOT cond )
      | ^( NULL expr NOT? )
      | ^( rel_op expr expr )
+     | in_eval
      | func_eval
      | ^( BOOL_COND expr )
 ;
 
+in_eval: ^( IN expr expr+ )
+;
+
 func_eval: ^( FUNC_EVAL func_name real_arg* )
 ;
 

Modified: pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g?rev=1468996&r1=1468995&r2=1468996&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g (original)
+++ pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g Wed Apr 17 16:49:21 2013
@@ -766,6 +766,10 @@ cond[LogicalExpressionPlan exprPlan] ret
        $expr = new RegexExpression( $exprPlan, $e1.expr, $e2.expr );
        $expr.setLocation( new SourceLocation( (PigParserNode)$STR_OP_MATCHES ) );
    }
+ | in_eval[$exprPlan]
+   {
+       $expr = $in_eval.expr;
+   }
  | func_eval[$exprPlan]
    {
        $expr = $func_eval.expr;
@@ -777,6 +781,16 @@ cond[LogicalExpressionPlan exprPlan] ret
    }
 ;
 
+in_eval[LogicalExpressionPlan plan] returns[LogicalExpression expr]
+@init {
+    List<LogicalExpression> args = new ArrayList<LogicalExpression>();
+}
+ : ^( IN exp1 = expr[$plan] { args.add( $exp1.expr ); } ( exp2 = expr[$plan] { args.add( $exp2.expr ); } )+ )
+   {
+       SourceLocation loc = new SourceLocation( (PigParserNode)$IN );
+       $expr = builder.buildUDF( loc, $plan, "IN", args );
+   }
+;
 
 func_eval[LogicalExpressionPlan plan] returns[LogicalExpression expr]
 @init {

Modified: pig/trunk/src/org/apache/pig/parser/QueryLexer.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/QueryLexer.g?rev=1468996&r1=1468995&r2=1468996&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/QueryLexer.g (original)
+++ pig/trunk/src/org/apache/pig/parser/QueryLexer.g Wed Apr 17 16:49:21 2013
@@ -285,6 +285,9 @@ STR_OP_NE : 'NEQ'
 STR_OP_MATCHES : 'MATCHES'
 ;
 
+IN : 'IN'
+;
+
 TRUE : 'TRUE'
 ;
 

Modified: pig/trunk/src/org/apache/pig/parser/QueryParser.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/QueryParser.g?rev=1468996&r1=1468995&r2=1468996&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/QueryParser.g (original)
+++ pig/trunk/src/org/apache/pig/parser/QueryParser.g Wed Apr 17 16:49:21 2013
@@ -620,6 +620,7 @@ unary_cond
     }
     : exp1 = expr
         ( ( IS NOT? NULL -> ^( NULL $exp1 NOT? ) )
+        | ( IN LEFT_PAREN ( expr ( COMMA expr )* ) RIGHT_PAREN -> ^( IN $exp1 expr+ ) )
         | ( rel_op exp2 = expr -> ^( rel_op $exp1 $exp2 ) )
         | ( -> ^(BOOL_COND expr) ) )
 ;
@@ -973,5 +974,6 @@ rel_str_op : STR_OP_EQ
 
 reserved_identifier_whitelist : RANK
                               | CUBE
+                              | IN
 ;
 

Modified: pig/trunk/test/commit-tests
URL: http://svn.apache.org/viewvc/pig/trunk/test/commit-tests?rev=1468996&r1=1468995&r2=1468996&view=diff
==============================================================================
--- pig/trunk/test/commit-tests (original)
+++ pig/trunk/test/commit-tests Wed Apr 17 16:49:21 2013
@@ -17,6 +17,8 @@
 **/TestFuncSpec.java
 **/TestGTOrEqual.java
 **/TestGreaterThan.java
+**/TestIn.java
+**/TestInUdf.java
 **/TestInputOutputFileValidator.java
 **/TestInstantiateFunc.java
 **/TestLTOrEqual.java

Added: pig/trunk/test/org/apache/pig/builtin/TestInUdf.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/builtin/TestInUdf.java?rev=1468996&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/builtin/TestInUdf.java (added)
+++ pig/trunk/test/org/apache/pig/builtin/TestInUdf.java Wed Apr 17 16:49:21 2013
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.joda.time.DateTime;
+import org.junit.Test;
+
+public class TestInUdf {
+    private static IN in = new IN();
+
+    /**
+     * Verify that IN EvalFunc works with various types of arguments.
+     * @throws IOException
+     */
+    @Test
+    public void testDataType() throws IOException {
+        Object[][] args = {
+                { new DataByteArray(new byte[] {'0'}),
+                  new DataByteArray(new byte[] {'1'}),
+                },
+                { new Boolean(true),
+                  new Boolean(false),
+                },
+                { new Integer(0),
+                  new Integer(1),
+                },
+                { new Long(0l),
+                  new Long(1l),
+                },
+                { new Float(0f),
+                  new Float(1f),
+                },
+                { new Double(0d),
+                  new Double(1d),
+                },
+                { new DateTime(0l),
+                  new DateTime(1l),
+                },
+                { new String("0"),
+                  new String("1"),
+                },
+                { new BigInteger("0"),
+                  new BigInteger("1"),
+                },
+                { new BigDecimal("0.0"),
+                  new BigDecimal("1.0"),
+                },
+        };
+
+        for (int i = 0; i < args.length; i++) {
+            Tuple input = TupleFactory.getInstance().newTuple();
+            input.append(args[i][0]);
+            input.append(args[i][1]);
+            // x IN (y)
+            assertFalse(in.exec(input));
+            input.append(args[i][0]);
+            // x IN (x, y)
+            assertTrue(in.exec(input));
+        }
+    }
+
+    /**
+     * Verify that IN EvalFunc returns false when first argument is null.
+     * @throws IOException
+     */
+    @Test
+    public void testNull() throws IOException {
+        Tuple input = TupleFactory.getInstance().newTuple();
+        input.append(null);
+        input.append(null);
+        // null IN (null)
+        assertFalse(in.exec(input));
+    }
+
+    /**
+     * Verify that IN EvalFunc throws ExecException with an invalid number of
+     * arguments.
+     * @throws IOException
+     */
+    @Test(expected = ExecException.class)
+    public void testInvalidNumOfArgs() throws IOException {
+        Tuple input = TupleFactory.getInstance().newTuple();
+        input.append(new Object());
+        // x IN ()
+        in.exec(input);
+        fail("IN must throw ExecException for " + input.size() + " argument(s)");
+    }
+}

Added: pig/trunk/test/org/apache/pig/test/TestIn.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestIn.java?rev=1468996&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestIn.java (added)
+++ pig/trunk/test/org/apache/pig/test/TestIn.java Wed Apr 17 16:49:21 2013
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.test;
+
+import static junit.framework.Assert.assertEquals;
+import static org.apache.pig.builtin.mock.Storage.resetData;
+import static org.apache.pig.builtin.mock.Storage.tuple;
+import static org.junit.Assert.fail;
+
+import java.util.List;
+
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.builtin.mock.Storage.Data;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.junit.Test;
+
+public class TestIn {
+
+    /**
+     * Verify that IN operator works with FILTER BY.
+     * @throws Exception
+     */
+    @Test
+    public void testWithFilter() throws Exception {
+        PigServer pigServer = new PigServer(ExecType.LOCAL);
+        Data data = resetData(pigServer);
+
+        data.set("foo",
+                tuple(1),
+                tuple(2),
+                tuple(3),
+                tuple(4),
+                tuple(5)
+                );
+
+        pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
+        pigServer.registerQuery("B = FILTER A BY i IN (1, 2, 3);");
+        pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
+
+        List<Tuple> out = data.get("bar");
+        assertEquals(3, out.size());
+        assertEquals(tuple(1), out.get(0));
+        assertEquals(tuple(2), out.get(1));
+        assertEquals(tuple(3), out.get(2));
+    }
+
+    /**
+     * Verify that IN operator works with ? operator.
+     * @throws Exception
+     */
+    @Test
+    public void testWithBincond() throws Exception {
+        PigServer pigServer = new PigServer(ExecType.LOCAL);
+        Data data = resetData(pigServer);
+
+        data.set("foo",
+                tuple(1),
+                tuple(2),
+                tuple(3),
+                tuple(4),
+                tuple(5)
+                );
+
+        pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
+        pigServer.registerQuery("B = FOREACH A GENERATE (i IN (1, 3, 5) ? 'ODD' : 'EVEN');");
+        pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
+
+        List<Tuple> out = data.get("bar");
+        assertEquals(5, out.size());
+        assertEquals(tuple("ODD"), out.get(0));
+        assertEquals(tuple("EVEN"), out.get(1));
+        assertEquals(tuple("ODD"), out.get(2));
+        assertEquals(tuple("EVEN"), out.get(3));
+        assertEquals(tuple("ODD"), out.get(4));
+    }
+
+    /**
+     * Verify that IN operator works with SPLIT.
+     * @throws Exception
+     */
+    @Test
+    public void testWithSplit() throws Exception {
+        PigServer pigServer = new PigServer(ExecType.LOCAL);
+        Data data = resetData(pigServer);
+
+        data.set("foo",
+                tuple(1),
+                tuple(2),
+                tuple(3),
+                tuple(4),
+                tuple(5)
+                );
+
+        pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
+        pigServer.registerQuery("SPLIT A INTO B IF i IN (1, 3, 5), C OTHERWISE;");
+        pigServer.registerQuery("STORE B INTO 'odd' USING mock.Storage();");
+        pigServer.registerQuery("STORE C INTO 'even' USING mock.Storage();");
+
+        List<Tuple> out = data.get("odd");
+        assertEquals(3, out.size());
+        assertEquals(tuple(1), out.get(0));
+        assertEquals(tuple(3), out.get(1));
+        assertEquals(tuple(5), out.get(2));
+
+        out = data.get("even");
+        assertEquals(2, out.size());
+        assertEquals(tuple(2), out.get(0));
+        assertEquals(tuple(4), out.get(1));
+    }
+
+    /**
+     * Verify that IN operator throws FrontendException when no operand is given.
+     * @throws Exception
+     */
+    @Test(expected = FrontendException.class)
+    public void testMissingRhsOperand() throws Exception {
+        PigServer pigServer = new PigServer(ExecType.LOCAL);
+        Data data = resetData(pigServer);
+
+        data.set("foo",
+                tuple(1),
+                tuple(2),
+                tuple(3),
+                tuple(4),
+                tuple(5)
+                );
+
+        pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
+        pigServer.registerQuery("B = FILTER A BY i IN ();"); // No operand
+        pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
+        fail("FrontendException must be thrown since no rhs operand is given to IN.");
+    }
+}