You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datafu.apache.org by mh...@apache.org on 2015/08/03 20:24:31 UTC

[1/2] incubator-datafu git commit: DATAFU-92 Created TupleFromBag.java file and unit tests cases for TupleFromBag in BagTests.java

Repository: incubator-datafu
Updated Branches:
  refs/heads/master d33b5a165 -> 433994c47


DATAFU-92 Created TupleFromBag.java file and unit tests cases for TupleFromBag in BagTests.java

Signed-off-by: Matthew Hayes <ma...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/49928a84
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/49928a84
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/49928a84

Branch: refs/heads/master
Commit: 49928a8411aa2e75ced803408a4dfc5625253df4
Parents: d33b5a1
Author: Sonali totade <so...@yahoo.com>
Authored: Mon Aug 3 22:51:27 2015 +0530
Committer: Matthew Hayes <ma...@gmail.com>
Committed: Mon Aug 3 10:56:24 2015 -0700

----------------------------------------------------------------------
 .../main/java/datafu/pig/bags/TupleFromBag.java | 164 +++++++++++++++++++
 .../java/datafu/test/pig/bags/BagTests.java     |  66 +++++++-
 2 files changed, 229 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/49928a84/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java b/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java
new file mode 100644
index 0000000..64459f2
--- /dev/null
+++ b/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package datafu.pig.bags;
+
+
+import java.io.IOException;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+/**
+ * <p>
+ * This UDF will extract tuple out of bag based on the specified index value of the tuple.
+ * It will have three input parameter
+ * 1. DataBag
+ * 2. Index
+ * 3. Default tuple (Optional)
+ * </p>
+ * <p>
+ * Example:
+ * <pre>
+ *
+ * define TupleFromBag datafu.pig.bags.TupleFromBag();
+ * %declare defaultTuple TOTUPLE(0,'NO_NUMBER')
+ *
+ * data = LOAD 'input' using PigStorage(',') AS (a:INT,b:CHARARRAY);
+ * -- input:
+ * (1,a)
+ * (1,b)
+ * (1,c)
+ * (2,d)
+ * (2,e)
+ * (2,f)
+ * (3,g)
+ * (3,h)
+ * (3,i)
+ *
+ * grouped = GROUP data BY a;
+ *
+ * --output:
+ * {group: int,data: {(a: int,b: chararray)}}
+ * (1,{(1,c),(1,b),(1,a)})
+ * (2,{(2,f),(2,e),(2,d)})
+ * (3,{(3,i),(3,h),(3,g)})
+ *
+ * result1 = FOREACH grouped GENERATE
+ *           group AS a,
+ *           TupleFromBag(data, 0);
+ *
+ * --output:
+ * {a: int,(a: int,b: chararray)}
+ * (1,(1,c))
+ * (2,(2,f))
+ * (3,(3,i))
+ *
+ *
+ * result2 = FOREACH grouped GENERATE
+ *           group AS a,
+ *           TupleFromBag(data,0).b as first_b,
+ *           TupleFromBag(data,1).b as second_b;
+ *
+ * --output:
+ * {a: int,first_b: chararray,second_b: chararray}
+ * (1,c,b)
+ * (2,f,e)
+ * (3,i,h)
+ *
+ *
+ * result3 = FOREACH grouped GENERATE
+ *           group AS a,
+ *           TupleFromBag(data,0).b as first_b,
+ *           TupleFromBag(data,3).b as forth_b;
+ * 
+ * --output:
+ * {a: int,first_b: chararray,forth_b: chararray}
+ * (1,c,)
+ * (2,f,)
+ * (3,i,)
+ *
+ * result4 = FOREACH grouped GENERATE
+ *           group AS a,
+ *           TupleFromBag(data,0,$emptyTuple).b as first_b,
+ *           TupleFromBag(data,3,$emptyTuple).b as forth_b;
+ * 
+ * --output:
+ * {a: int,first_b: chararray,forth_b: chararray}
+ * (1,c,NO_NUMBER)
+ * (2,f,NO_NUMBER)
+ * (3,i,NO_NUMBER)
+ * 
+ * </pre>
+ * </p>
+ */
+
+public class TupleFromBag extends EvalFunc<Tuple>{
+
+	@Override
+	public Tuple exec(Tuple tinput) throws IOException
+	{
+
+		try{
+			DataBag samples = (DataBag) tinput.get(0);
+
+			int tupleIndex = 0;
+			int index = ((Number)tinput.get(1)).intValue();
+			for (Tuple tuple : samples) {
+				if(tupleIndex == index){
+					return tuple;
+				}
+				tupleIndex++;
+			}
+		}
+		catch (Exception e){
+			return null;
+		}
+		if (tinput.size() == 3){
+			return DataType.toTuple(tinput.get(2));
+		}
+
+		return null;
+	}
+
+	@Override
+	public Schema outputSchema(Schema input)
+	{
+		try {
+			if (!(input.size() == 2 || input.size() == 3))
+			{
+				throw new RuntimeException("Expected input to have two or three fields");
+			}
+
+			if (input.getField(1).type != DataType.INTEGER ) {
+				throw new RuntimeException("Expected an INT as second input, got: "+input.getField(1).type);
+			}
+
+			return new Schema(input.getField(0).schema);
+		}
+
+		catch (FrontendException e) {
+			e.printStackTrace();
+			throw new RuntimeException(e);
+		}
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/49928a84/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java b/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
index 11753ba..a5851e4 100644
--- a/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
+++ b/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
@@ -187,9 +187,73 @@ public class BagTests extends PigTests
                  "(1,{(1),(2),(3),(4)})",
                  "(2,{(10),(20),(30),(40),(50),(60)})");
   }
-
+  
   /**
 
+define TupleFromBag datafu.pig.bags.TupleFromBag();
+
+%declare emptyTuple TOTUPLE(0,'NO_NUMBER')
+
+data = LOAD 'input' using PigStorage(',') AS (a:INT,b:CHARARRAY);
+
+grouped = GROUP data BY a;
+
+result1 = FOREACH grouped GENERATE group AS a, TupleFromBag(data, 0);
+
+result2 = FOREACH grouped GENERATE group AS a, TupleFromBag(data,0).b as first_b, TupleFromBag(data,1).b as second_b;
+
+result3 = FOREACH grouped GENERATE group AS a, TupleFromBag(data,0).b as first_b, TupleFromBag(data,3).b as forth_b;
+
+result4 = FOREACH grouped GENERATE group AS a,TupleFromBag(data,0,$emptyTuple).b as first_b, TupleFromBag(data,3,$emptyTuple).b as forth_b;
+
+   **/
+
+  @Multiline
+  private String tupleFromBagTest;
+
+  @Test
+  public void tupleFromBagTest() throws Exception
+  {
+	  PigTest test = createPigTestFromString(tupleFromBagTest);
+
+	  writeLinesToFile("input",
+              "1,a",
+              "1,b",
+              "1,c",
+              "2,d",
+              "2,e",
+              "2,f",
+              "3,g",
+              "3,h",
+              "3,i");
+
+	  test.runScript();
+
+	  assertOutput(test, "result1",
+              "(1,(1,c))",
+              "(2,(2,f))",
+              "(3,(3,i))");
+
+	  assertOutput(test, "result2",
+              "(1,c,b)",
+              "(2,f,e)",
+              "(3,i,h)");
+
+	  assertOutput(test, "result3",
+              "(1,c,)",
+              "(2,f,)",
+              "(3,i,)");
+
+	  assertOutput(test, "result4",
+              "(1,c,NO_NUMBER)",
+              "(2,f,NO_NUMBER)",
+              "(3,i,NO_NUMBER)");
+
+  }
+
+
+ /**
+
 
   define FirstTupleFromBag datafu.pig.bags.FirstTupleFromBag();
 


[2/2] incubator-datafu git commit: Minor doc fixes for TupleFromBag to fix build

Posted by mh...@apache.org.
Minor doc fixes for TupleFromBag to fix build


Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/433994c4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/433994c4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/433994c4

Branch: refs/heads/master
Commit: 433994c470fb3cf9b907ce8b5ce14c5ac8d47e4b
Parents: 49928a8
Author: Matthew Hayes <ma...@gmail.com>
Authored: Mon Aug 3 11:24:17 2015 -0700
Committer: Matthew Hayes <ma...@gmail.com>
Committed: Mon Aug 3 11:24:17 2015 -0700

----------------------------------------------------------------------
 .../main/java/datafu/pig/bags/TupleFromBag.java | 22 ++++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/433994c4/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java b/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java
index 64459f2..fa13386 100644
--- a/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java
+++ b/datafu-pig/src/main/java/datafu/pig/bags/TupleFromBag.java
@@ -29,16 +29,21 @@ import org.apache.pig.impl.logicalLayer.FrontendException;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
 /**
  * <p>
- * This UDF will extract tuple out of bag based on the specified index value of the tuple.
- * It will have three input parameter
- * 1. DataBag
- * 2. Index
- * 3. Default tuple (Optional)
+ * This UDF will extract a tuple from a bag based on a specified index.
  * </p>
  * <p>
+ * There are three input parameter:
+ * </p>
+ * <ol>
+ * <li>DataBag</li>
+ * <li>Index</li>
+ * <li>Default tuple (Optional)</li>
+ * </ol>
+ * <p>
  * Example:
+ * </p>
  * <pre>
- *
+ * {@code
  * define TupleFromBag datafu.pig.bags.TupleFromBag();
  * %declare defaultTuple TOTUPLE(0,'NO_NUMBER')
  *
@@ -106,9 +111,8 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
  * (1,c,NO_NUMBER)
  * (2,f,NO_NUMBER)
  * (3,i,NO_NUMBER)
- * 
+ * } 
  * </pre>
- * </p>
  */
 
 public class TupleFromBag extends EvalFunc<Tuple>{
@@ -161,4 +165,4 @@ public class TupleFromBag extends EvalFunc<Tuple>{
 		}
 	}
 
-}
\ No newline at end of file
+}