You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datafu.apache.org by mh...@apache.org on 2015/08/03 20:24:31 UTC
[1/2] incubator-datafu git commit: DATAFU-92 Created
TupleFromBag.java file and unit tests cases for TupleFromBag in BagTests.java
Repository: incubator-datafu
Updated Branches:
refs/heads/master d33b5a165 -> 433994c47
DATAFU-92 Created TupleFromBag.java file and unit tests cases for TupleFromBag in BagTests.java
Signed-off-by: Matthew Hayes <ma...@gmail.com>
Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/49928a84
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/49928a84
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/49928a84
Branch: refs/heads/master
Commit: 49928a8411aa2e75ced803408a4dfc5625253df4
Parents: d33b5a1
Author: Sonali totade <so...@yahoo.com>
Authored: Mon Aug 3 22:51:27 2015 +0530
Committer: Matthew Hayes <ma...@gmail.com>
Committed: Mon Aug 3 10:56:24 2015 -0700
----------------------------------------------------------------------
.../main/java/datafu/pig/bags/TupleFromBag.java | 164 +++++++++++++++++++
.../java/datafu/test/pig/bags/BagTests.java | 66 +++++++-
2 files changed, 229 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/49928a84/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java b/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java
new file mode 100644
index 0000000..64459f2
--- /dev/null
+++ b/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package datafu.pig.bags;
+
+
+import java.io.IOException;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+/**
+ * <p>
+ * This UDF will extract tuple out of bag based on the specified index value of the tuple.
+ * It will have three input parameter
+ * 1. DataBag
+ * 2. Index
+ * 3. Default tuple (Optional)
+ * </p>
+ * <p>
+ * Example:
+ * <pre>
+ *
+ * define TupleFromBag datafu.pig.bags.TupleFromBag();
+ * %declare defaultTuple TOTUPLE(0,'NO_NUMBER')
+ *
+ * data = LOAD 'input' using PigStorage(',') AS (a:INT,b:CHARARRAY);
+ * -- input:
+ * (1,a)
+ * (1,b)
+ * (1,c)
+ * (2,d)
+ * (2,e)
+ * (2,f)
+ * (3,g)
+ * (3,h)
+ * (3,i)
+ *
+ * grouped = GROUP data BY a;
+ *
+ * --output:
+ * {group: int,data: {(a: int,b: chararray)}}
+ * (1,{(1,c),(1,b),(1,a)})
+ * (2,{(2,f),(2,e),(2,d)})
+ * (3,{(3,i),(3,h),(3,g)})
+ *
+ * result1 = FOREACH grouped GENERATE
+ * group AS a,
+ * TupleFromBag(data, 0);
+ *
+ * --output:
+ * {a: int,(a: int,b: chararray)}
+ * (1,(1,c))
+ * (2,(2,f))
+ * (3,(3,i))
+ *
+ *
+ * result2 = FOREACH grouped GENERATE
+ * group AS a,
+ * TupleFromBag(data,0).b as first_b,
+ * TupleFromBag(data,1).b as second_b;
+ *
+ * --output:
+ * {a: int,first_b: chararray,second_b: chararray}
+ * (1,c,b)
+ * (2,f,e)
+ * (3,i,h)
+ *
+ *
+ * result3 = FOREACH grouped GENERATE
+ * group AS a,
+ * TupleFromBag(data,0).b as first_b,
+ * TupleFromBag(data,3).b as forth_b;
+ *
+ * --output:
+ * {a: int,first_b: chararray,forth_b: chararray}
+ * (1,c,)
+ * (2,f,)
+ * (3,i,)
+ *
+ * result4 = FOREACH grouped GENERATE
+ * group AS a,
+ * TupleFromBag(data,0,$emptyTuple).b as first_b,
+ * TupleFromBag(data,3,$emptyTuple).b as forth_b;
+ *
+ * --output:
+ * {a: int,first_b: chararray,forth_b: chararray}
+ * (1,c,NO_NUMBER)
+ * (2,f,NO_NUMBER)
+ * (3,i,NO_NUMBER)
+ *
+ * </pre>
+ * </p>
+ */
+
+public class TupleFromBag extends EvalFunc<Tuple>{
+
+ @Override
+ public Tuple exec(Tuple tinput) throws IOException
+ {
+
+ try{
+ DataBag samples = (DataBag) tinput.get(0);
+
+ int tupleIndex = 0;
+ int index = ((Number)tinput.get(1)).intValue();
+ for (Tuple tuple : samples) {
+ if(tupleIndex == index){
+ return tuple;
+ }
+ tupleIndex++;
+ }
+ }
+ catch (Exception e){
+ return null;
+ }
+ if (tinput.size() == 3){
+ return DataType.toTuple(tinput.get(2));
+ }
+
+ return null;
+ }
+
+ @Override
+ public Schema outputSchema(Schema input)
+ {
+ try {
+ if (!(input.size() == 2 || input.size() == 3))
+ {
+ throw new RuntimeException("Expected input to have two or three fields");
+ }
+
+ if (input.getField(1).type != DataType.INTEGER ) {
+ throw new RuntimeException("Expected an INT as second input, got: "+input.getField(1).type);
+ }
+
+ return new Schema(input.getField(0).schema);
+ }
+
+ catch (FrontendException e) {
+ e.printStackTrace();
+ throw new RuntimeException(e);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/49928a84/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java b/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
index 11753ba..a5851e4 100644
--- a/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
+++ b/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
@@ -187,9 +187,73 @@ public class BagTests extends PigTests
"(1,{(1),(2),(3),(4)})",
"(2,{(10),(20),(30),(40),(50),(60)})");
}
-
+
/**
+define TupleFromBag datafu.pig.bags.TupleFromBag();
+
+%declare emptyTuple TOTUPLE(0,'NO_NUMBER')
+
+data = LOAD 'input' using PigStorage(',') AS (a:INT,b:CHARARRAY);
+
+grouped = GROUP data BY a;
+
+result1 = FOREACH grouped GENERATE group AS a, TupleFromBag(data, 0);
+
+result2 = FOREACH grouped GENERATE group AS a, TupleFromBag(data,0).b as first_b, TupleFromBag(data,1).b as second_b;
+
+result3 = FOREACH grouped GENERATE group AS a, TupleFromBag(data,0).b as first_b, TupleFromBag(data,3).b as forth_b;
+
+result4 = FOREACH grouped GENERATE group AS a,TupleFromBag(data,0,$emptyTuple).b as first_b, TupleFromBag(data,3,$emptyTuple).b as forth_b;
+
+ **/
+
+ @Multiline
+ private String tupleFromBagTest;
+
+ @Test
+ public void tupleFromBagTest() throws Exception
+ {
+ PigTest test = createPigTestFromString(tupleFromBagTest);
+
+ writeLinesToFile("input",
+ "1,a",
+ "1,b",
+ "1,c",
+ "2,d",
+ "2,e",
+ "2,f",
+ "3,g",
+ "3,h",
+ "3,i");
+
+ test.runScript();
+
+ assertOutput(test, "result1",
+ "(1,(1,c))",
+ "(2,(2,f))",
+ "(3,(3,i))");
+
+ assertOutput(test, "result2",
+ "(1,c,b)",
+ "(2,f,e)",
+ "(3,i,h)");
+
+ assertOutput(test, "result3",
+ "(1,c,)",
+ "(2,f,)",
+ "(3,i,)");
+
+ assertOutput(test, "result4",
+ "(1,c,NO_NUMBER)",
+ "(2,f,NO_NUMBER)",
+ "(3,i,NO_NUMBER)");
+
+ }
+
+
+ /**
+
define FirstTupleFromBag datafu.pig.bags.FirstTupleFromBag();
[2/2] incubator-datafu git commit: Minor doc fixes for TupleFromBag
to fix build
Posted by mh...@apache.org.
Minor doc fixes for TupleFromBag to fix build
Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/433994c4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/433994c4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/433994c4
Branch: refs/heads/master
Commit: 433994c470fb3cf9b907ce8b5ce14c5ac8d47e4b
Parents: 49928a8
Author: Matthew Hayes <ma...@gmail.com>
Authored: Mon Aug 3 11:24:17 2015 -0700
Committer: Matthew Hayes <ma...@gmail.com>
Committed: Mon Aug 3 11:24:17 2015 -0700
----------------------------------------------------------------------
.../main/java/datafu/pig/bags/TupleFromBag.java | 22 ++++++++++++--------
1 file changed, 13 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/433994c4/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java b/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java
index 64459f2..fa13386 100644
--- a/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java
+++ b/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java
@@ -29,16 +29,21 @@ import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
/**
* <p>
- * This UDF will extract tuple out of bag based on the specified index value of the tuple.
- * It will have three input parameter
- * 1. DataBag
- * 2. Index
- * 3. Default tuple (Optional)
+ * This UDF will extract a tuple from a bag based on a specified index.
* </p>
* <p>
+ * There are three input parameter:
+ * </p>
+ * <ol>
+ * <li>DataBag</li>
+ * <li>Index</li>
+ * <li>Default tuple (Optional)</li>
+ * </ol>
+ * <p>
* Example:
+ * </p>
* <pre>
- *
+ * {@code
* define TupleFromBag datafu.pig.bags.TupleFromBag();
* %declare defaultTuple TOTUPLE(0,'NO_NUMBER')
*
@@ -106,9 +111,8 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
* (1,c,NO_NUMBER)
* (2,f,NO_NUMBER)
* (3,i,NO_NUMBER)
- *
+ * }
* </pre>
- * </p>
*/
public class TupleFromBag extends EvalFunc<Tuple>{
@@ -161,4 +165,4 @@ public class TupleFromBag extends EvalFunc<Tuple>{
}
}
-}
\ No newline at end of file
+}