You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2010/04/24 02:59:20 UTC
svn commit: r937570 - in /hadoop/pig/trunk/contrib: ./
piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/
piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/
Author: gates
Date: Sat Apr 24 00:59:20 2010
New Revision: 937570
URL: http://svn.apache.org/viewvc?rev=937570&view=rev
Log:
PIG-1385 UDF to create tuples and bags.
Added:
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java
Modified:
hadoop/pig/trunk/contrib/CHANGES.txt
Modified: hadoop/pig/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/CHANGES.txt?rev=937570&r1=937569&r2=937570&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/CHANGES.txt Sat Apr 24 00:59:20 2010
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-1385 UDF to create tuples and bags (hcbusy via gates)
+
PIG-1331 Add Owl as a contrib project (ajaykidave via gates)
OPTIMIZATIONS
Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java?rev=937570&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java (added)
+++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToBag.java Sat Apr 24 00:59:20 2010
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.piggybank.evaluation.util;
+
+
+import java.io.IOException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.BagFactory;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+/**
+ * This class takes a list of items and puts them into a bag
+ *
+ * T = foreach U generate ToBag($0, $1, $2);
+ *
+ * It's like saying this:
+ *
+ * T = foreach U generate {($0), ($1), ($2)}
+ *
+ */
+public class ToBag extends EvalFunc<DataBag> {
+
+ @Override
+ public DataBag exec(Tuple input) throws IOException {
+ try {
+ DataBag bag = BagFactory.getInstance().newDefaultBag();
+
+ for (int i = 0; i < input.size(); ++i) {
+ final Object object = input.get(i);
+ if (object != null) {
+ Tuple tp2 = TupleFactory.getInstance().newTuple(1);
+ tp2.set(0, object);
+ bag.add(tp2);
+ }
+ }
+
+ return bag;
+ } catch (Exception ee) {
+ throw new RuntimeException("Error while creating a bag", ee);
+ }
+ }
+
+}
Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java?rev=937570&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java (added)
+++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/ToTuple.java Sat Apr 24 00:59:20 2010
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.piggybank.evaluation.util;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+
+/**
+ * This class makes a tuple out of the parameter
+ *
+ * T = foreach U generate ToTuple($0, $1, $2);
+ *
+ * It generates a tuple containing $0, $1, and $2
+ *
+ *
+ */
+public class ToTuple extends EvalFunc<Tuple> {
+
+ @Override
+ public Tuple exec(Tuple input) throws IOException {
+ try {
+ List<Object> items = new ArrayList<Object>();
+ for (int i = 0; i < input.size(); ++i) {
+ items.add(input.get(i));
+ }
+ return TupleFactory.getInstance().newTuple(items);
+ } catch (Exception e) {
+ throw new RuntimeException("Error while creating a tuple", e);
+ }
+ }
+
+ @Override
+ public Schema outputSchema(Schema input) {
+ try {
+ Schema tupleSchema = new Schema();
+ for (int i = 0; i < input.size(); ++i) {
+ tupleSchema.add(input.getField(i));
+ }
+ return new Schema(new Schema.FieldSchema(getSchemaName(this
+ .getClass().getName().toLowerCase(), input), tupleSchema,
+ DataType.TUPLE));
+ } catch (Exception e) {
+ return null;
+ }
+ }
+
+}
Added: hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java?rev=937570&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java (added)
+++ hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/TestToBagToTuple.java Sat Apr 24 00:59:20 2010
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.piggybank.test.evaluation.util;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import junit.framework.Assert;
+
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.apache.pig.piggybank.evaluation.util.ToBag;
+import org.apache.pig.piggybank.evaluation.util.ToTuple;
+import org.junit.Test;
+
+public class TestToBagToTuple {
+ @Test
+ public void toBag() throws Exception{
+ ToBag tb = new ToBag();
+
+ Tuple input = TupleFactory.getInstance().newTuple();
+ for (int i = 0; i < 100; ++i) {
+ input.append(i);
+ }
+
+ Set<Integer> s = new HashSet<Integer>();
+ DataBag db = tb.exec(input);
+ for (Tuple t : db) {
+ s.add((Integer) t.get(0));
+ }
+
+ // finally check the bag had everything we put in the tuple.
+ Assert.assertEquals(100, s.size());
+ for (int i = 0; i < 100; ++i) {
+ Assert.assertTrue(s.contains(i));
+ }
+ }
+
+ @Test
+ public void toTuple() throws Exception{
+ ToTuple tb = new ToTuple();
+
+ Tuple input = TupleFactory.getInstance().newTuple();
+ for (int i = 0; i < 100; ++i) {
+ input.append(i);
+ }
+
+ Tuple output = tb.exec(input);
+ Assert.assertFalse(input == output);
+ Assert.assertEquals(input, output);
+ }
+}