You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by ju...@apache.org on 2018/12/10 05:18:41 UTC
[1/2] bigtop git commit: BIGTOP-3086: Drop datafu packaging
Repository: bigtop
Updated Branches:
refs/heads/master aaffc1e2c -> 4cee56bdc
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/linkanalysis/PageRankTests.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/linkanalysis/PageRankTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/linkanalysis/PageRankTests.java
deleted file mode 100644
index 11d31bd..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/linkanalysis/PageRankTests.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.linkanalysis;
-
-
-import static org.junit.Assert.*;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-
-import org.apache.bigtop.itest.datafu.linkanalysis.PageRankTest;
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class PageRankTests extends PigTests
-{
- @Test
- public void pigPageRankTest() throws Exception
- {
- PigTest test = createPigTest("datafu/linkanalysis/pageRankTest.pig");
-
- String[] edges = PageRankTest.getWikiExampleEdges();
-
- Map<String,Integer> nodeIds = new HashMap<String,Integer>();
- Map<Integer,String> nodeIdsReversed = new HashMap<Integer,String>();
- Map<String,Float> expectedRanks = PageRankTest.parseExpectedRanks(PageRankTest.getWikiExampleExpectedRanks());
-
- File f = new File(System.getProperty("user.dir"), "input").getAbsoluteFile();
- if (f.exists())
- {
- f.delete();
- }
-
- FileWriter writer = new FileWriter(f);
- BufferedWriter bufferedWriter = new BufferedWriter(writer);
-
- for (String edge : edges)
- {
- String[] edgeParts = edge.split(" ");
- String source = edgeParts[0];
- String dest = edgeParts[1];
- if (!nodeIds.containsKey(source))
- {
- int id = nodeIds.size();
- nodeIds.put(source,id);
- nodeIdsReversed.put(id, source);
- }
- if (!nodeIds.containsKey(dest))
- {
- int id = nodeIds.size();
- nodeIds.put(dest,id);
- nodeIdsReversed.put(id, dest);
- }
- Integer sourceId = nodeIds.get(source);
- Integer destId = nodeIds.get(dest);
-
- StringBuffer sb = new StringBuffer();
-
- sb.append("1\t"); // topic
- sb.append(sourceId.toString() + "\t");
- sb.append(destId.toString() + "\t");
- sb.append("1.0\n"); // weight
-
- bufferedWriter.write(sb.toString());
- }
-
- bufferedWriter.close();
-
- test.runScript();
- Iterator<Tuple> tuples = test.getAlias("data_grouped3");
-
- System.out.println("Final node ranks:");
- int nodeCount = 0;
- while (tuples.hasNext())
- {
- Tuple nodeTuple = tuples.next();
-
- Integer topic = (Integer)nodeTuple.get(0);
- Integer nodeId = (Integer)nodeTuple.get(1);
- Float nodeRank = (Float)nodeTuple.get(2);
-
- assertEquals(1, topic.intValue());
-
- System.out.println(String.format("%d => %f", nodeId, nodeRank));
-
- Float expectedNodeRank = expectedRanks.get(nodeIdsReversed.get(nodeId));
-
- assertTrue(String.format("expected: %f, actual: %f", expectedNodeRank, nodeRank),
- Math.abs(expectedNodeRank - nodeRank * 100.0f) < 0.1);
-
- nodeCount++;
- }
-
- assertEquals(nodeIds.size(),nodeCount);
- }
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/numbers/NumberTests.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/numbers/NumberTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/numbers/NumberTests.java
deleted file mode 100644
index 4408a55..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/numbers/NumberTests.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.numbers;
-
-import static org.junit.Assert.*;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class NumberTests extends PigTests
-{
- /**
- * Test the RandomIntRange UDF. The main purpose is to make sure it can be used in a Pig script.
- * Also the range of output values is tested.
- *
- * @throws Exception
- */
- @Test
- public void randomIntRangeTest() throws Exception
- {
- PigTest test = createPigTest("datafu/numbers/randomIntRangeTest.pig",
- "MIN=1", "MAX=10");
-
- List<String> input = new ArrayList<String>();
- for (int i=0; i<100; i++)
- {
- input.add(String.format("(%d)", i));
- }
-
- writeLinesToFile("input",
- input.toArray(new String[0]));
-
- test.runScript();
-
- List<Tuple> tuples = getLinesForAlias(test, "data2", false);
- for (Tuple tuple : tuples)
- {
- Integer randValue = (Integer)tuple.get(1);
- assertTrue(randValue >= 1);
- assertTrue(randValue <= 10);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/sessions/SessionTests.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/sessions/SessionTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/sessions/SessionTests.java
deleted file mode 100644
index d13f1c3..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/sessions/SessionTests.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.sessions;
-
-import static org.junit.Assert.*;
-
-import java.util.HashMap;
-
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class SessionTests extends PigTests
-{
- @Test
- public void sessionizeTest() throws Exception
- {
- PigTest test = createPigTest("datafu/sessions/sessionizeTest.pig",
- "TIME_WINDOW=30m",
- "JAR_PATH=" + getJarPath());
-
- this.writeLinesToFile("input",
- "2010-01-01T01:00:00Z\t1\t10",
- "2010-01-01T01:15:00Z\t1\t20",
- "2010-01-01T01:31:00Z\t1\t10",
- "2010-01-01T01:35:00Z\t1\t20",
- "2010-01-01T02:30:00Z\t1\t30",
-
- "2010-01-01T01:00:00Z\t2\t10",
- "2010-01-01T01:31:00Z\t2\t20",
- "2010-01-01T02:10:00Z\t2\t30",
- "2010-01-01T02:40:30Z\t2\t40",
- "2010-01-01T03:30:00Z\t2\t50",
-
- "2010-01-01T01:00:00Z\t3\t10",
- "2010-01-01T01:01:00Z\t3\t20",
- "2010-01-01T01:02:00Z\t3\t5",
- "2010-01-01T01:10:00Z\t3\t25",
- "2010-01-01T01:15:00Z\t3\t50",
- "2010-01-01T01:25:00Z\t3\t30",
- "2010-01-01T01:30:00Z\t3\t15");
-
- test.runScript();
-
- HashMap<Integer,HashMap<Integer,Boolean>> userValues = new HashMap<Integer,HashMap<Integer,Boolean>>();
-
- for (Tuple t : this.getLinesForAlias(test, "max_value"))
- {
- Integer userId = (Integer)t.get(0);
- Integer max = (Integer)t.get(1);
- if (!userValues.containsKey(userId))
- {
- userValues.put(userId, new HashMap<Integer,Boolean>());
- }
- userValues.get(userId).put(max, true);
- }
-
- assertEquals(2, userValues.get(1).size());
- assertEquals(5, userValues.get(2).size());
- assertEquals(1, userValues.get(3).size());
-
- assertTrue(userValues.get(1).containsKey(20));
- assertTrue(userValues.get(1).containsKey(30));
-
- assertTrue(userValues.get(2).containsKey(10));
- assertTrue(userValues.get(2).containsKey(20));
- assertTrue(userValues.get(2).containsKey(30));
- assertTrue(userValues.get(2).containsKey(40));
- assertTrue(userValues.get(2).containsKey(50));
-
- assertTrue(userValues.get(3).containsKey(50));
- }
-}
-
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/stats/MarkovPairTests.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/stats/MarkovPairTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/stats/MarkovPairTests.java
deleted file mode 100644
index f1f1c2f..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/stats/MarkovPairTests.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.stats;
-
-import static org.junit.Assert.*;
-
-import java.util.Iterator;
-
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class MarkovPairTests extends PigTests
-{
- @Test
- public void markovPairDefaultTest() throws Exception
- {
- PigTest test = createPigTest("datafu/stats/markovPairDefault.pig",
- "schema=(data: bag {t: tuple(val:int)})");
-
- writeLinesToFile("input", "{(10),(20),(30),(40),(50),(60)}");
-
- String[] expectedOutput = {
- "({((10),(20)),((20),(30)),((30),(40)),((40),(50)),((50),(60))})"
- };
-
- test.runScript();
-
- Iterator<Tuple> actualOutput = test.getAlias("data_out");
-
- assertTuplesMatch(expectedOutput, actualOutput);
- }
-
- @Test
- public void markovPairMultipleInput() throws Exception
- {
- PigTest test = createPigTest("datafu/stats/markovPairDefault.pig",
- "schema=(data: bag {t: tuple(val1:int,val2:int)})");
-
- writeLinesToFile("input", "{(10,100),(20,200),(30,300),(40,400),(50,500),(60,600)}");
-
- String[] expectedOutput = {
- "({((10,100),(20,200)),((20,200),(30,300)),((30,300),(40,400)),((40,400),(50,500)),((50,500),(60,600))})"
- };
-
-
- test.runScript();
-
- Iterator<Tuple> actualOutput = test.getAlias("data_out");
-
- assertTuplesMatch(expectedOutput, actualOutput);
- }
-
- @Test
- public void markovPairLookaheadTest() throws Exception
- {
- PigTest test = createPigTest("datafu/stats/markovPairLookahead.pig",
- "schema=(data: bag {t: tuple(val:int)})",
- "lookahead=3");
-
- writeLinesToFile("input", "{(10),(20),(30),(40),(50)}");
-
- String[] expectedOutput = {
- "({((10),(20)),((10),(30)),((10),(40)),((20),(30)),((20),(40)),((20),(50)),((30),(40)),((30),(50)),((40),(50))})"
- };
-
- test.runScript();
-
- Iterator<Tuple> actualOutput = test.getAlias("data_out");
-
- assertTuplesMatch(expectedOutput, actualOutput);
- }
-
- private void assertTuplesMatch(String[] expectedOutput, Iterator<Tuple> actualOutput)
- {
- Iterator<Tuple> tuples = actualOutput;
-
- for (String outputLine : expectedOutput)
- {
- assertTrue(tuples.hasNext());
- Tuple outputTuple = tuples.next();
- System.out.println(String.format("expected: %s", outputLine));
- System.out.println(String.format("actual: %s", outputTuple.toString()));
- assertEquals(outputLine,outputTuple.toString());
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/stats/QuantileTests.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/stats/QuantileTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/stats/QuantileTests.java
deleted file mode 100644
index e9ef05d..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/stats/QuantileTests.java
+++ /dev/null
@@ -1,196 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.stats;
-
-import static org.junit.Assert.*;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class QuantileTests extends PigTests
-{
- @Test
- public void quantileTest() throws Exception
- {
- PigTest test = createPigTest("datafu/stats/quantileTest.pig",
- "QUANTILES='0.0','0.25','0.5','0.75','1.0'");
-
- String[] input = {"1","2","3","4","10","5","6","7","8","9"};
- writeLinesToFile("input", input);
-
- test.runScript();
-
- List<Tuple> output = getLinesForAlias(test, "data_out", true);
-
- assertEquals(1,output.size());
- assertEquals("(1.0,3.0,5.5,8.0,10.0)", output.get(0).toString());
- }
-
- @Test
- public void quantile2Test() throws Exception
- {
- PigTest test = createPigTest("datafu/stats/quantileTest.pig",
- "QUANTILES='5'");
-
- String[] input = {"1","2","3","4","10","5","6","7","8","9"};
- writeLinesToFile("input", input);
-
- test.runScript();
-
- List<Tuple> output = getLinesForAlias(test, "data_out", true);
-
- assertEquals(1,output.size());
- assertEquals("(1.0,3.0,5.5,8.0,10.0)", output.get(0).toString());
- }
-
- @Test
- public void medianTest() throws Exception
- {
- PigTest test = createPigTest("datafu/stats/medianTest.pig");
-
- String[] input = {"4","5","6","9","10","7","8","2","3","1"};
- writeLinesToFile("input", input);
-
- test.runScript();
-
- List<Tuple> output = getLinesForAlias(test, "data_out", true);
-
- assertEquals(1,output.size());
- assertEquals("(5.5)", output.get(0).toString());
- }
-
- @Test
- public void streamingMedianTest() throws Exception
- {
- PigTest test = createPigTest("datafu/stats/streamingMedianTest.pig");
-
- String[] input = {"0","4","5","6","9","10","7","8","2","3","1"};
- writeLinesToFile("input", input);
-
- test.runScript();
-
- List<Tuple> output = getLinesForAlias(test, "data_out", true);
-
- assertEquals(1,output.size());
- assertEquals("(5.0)", output.get(0).toString());
- }
-
- @Test
- public void streamingQuantileTest() throws Exception {
- PigTest test = createPigTest("datafu/stats/streamingQuantileTest.pig",
- "QUANTILES='5'");
-
- String[] input = {"1","2","3","4","10","5","6","7","8","9"};
- writeLinesToFile("input", input);
-
- test.runScript();
-
- List<Tuple> output = getLinesForAlias(test, "data_out", true);
-
- assertEquals(1,output.size());
- assertEquals("(1.0,3.0,5.0,8.0,10.0)", output.get(0).toString());
- }
-
- @Test
- public void streamingQuantile2Test() throws Exception {
- PigTest test = createPigTest("datafu/stats/streamingQuantileTest.pig",
- "QUANTILES='0.5','0.75','1.0'");
-
- String[] input = {"1","2","3","4","10","5","6","7","8","9"};
- writeLinesToFile("input", input);
-
- test.runScript();
-
- List<Tuple> output = getLinesForAlias(test, "data_out", true);
-
- assertEquals(1,output.size());
- assertEquals("(5.0,8.0,10.0)", output.get(0).toString());
- }
-
- @Test
- public void streamingQuantile3Test() throws Exception {
- PigTest test = createPigTest("datafu/stats/streamingQuantileTest.pig",
- "QUANTILES='0.07','0.03','0.37','1.0','0.0'");
-
- List<String> input = new ArrayList<String>();
- for (int i=1000; i>=1; i--)
- {
- input.add(Integer.toString(i));
- }
-
- writeLinesToFile("input", input.toArray(new String[0]));
-
- test.runScript();
-
- List<Tuple> output = getLinesForAlias(test, "data_out", true);
-
- assertEquals(1,output.size());
- assertEquals("(70.0,30.0,370.0,1000.0,1.0)", output.get(0).toString());
- }
-
- @Test
- public void streamingQuantile4Test() throws Exception {
- PigTest test = createPigTest("datafu/stats/streamingQuantileTest.pig",
- "QUANTILES='0.0013','0.0228','0.1587','0.5','0.8413','0.9772','0.9987'");
-
- List<String> input = new ArrayList<String>();
- for (int i=100000; i>=0; i--)
- {
- input.add(Integer.toString(i));
- }
-
- writeLinesToFile("input", input.toArray(new String[0]));
-
- test.runScript();
-
- List<Tuple> output = getLinesForAlias(test, "data_out", true);
-
- assertEquals(1,output.size());
- assertEquals("(130.0,2280.0,15870.0,50000.0,84130.0,97720.0,99870.0)", output.get(0).toString());
- }
-
-
-
- @Test
- public void quantile3Test() throws Exception {
- PigTest test = createPigTest("datafu/stats/quantileTest.pig",
- "QUANTILES='0.0013','0.0228','0.1587','0.5','0.8413','0.9772','0.9987'");
-
- List<String> input = new ArrayList<String>();
- for (int i=100000; i>=0; i--)
- {
- input.add(Integer.toString(i));
- }
-
- writeLinesToFile("input", input.toArray(new String[0]));
-
- test.runScript();
-
- List<Tuple> output = getLinesForAlias(test, "data_out", true);
-
- assertEquals(1,output.size());
- assertEquals("(130.0,2280.0,15870.0,50000.0,84130.0,97720.0,99870.0)", output.get(0).toString());
- }
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/stats/WilsonBinConfTests.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/stats/WilsonBinConfTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/stats/WilsonBinConfTests.java
deleted file mode 100644
index cb43ce1..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/stats/WilsonBinConfTests.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.stats;
-
-import static org.junit.Assert.*;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class WilsonBinConfTests extends PigTests
-{
- @Test
- public void wilsonTest() throws Exception
- {
- PigTest test = createPigTest("datafu/stats/wilsonBinConfTests.pig",
- "alpha=0.05"); // alpha is 0.05 for 95% confidence
-
- writeLinesToFile("input",
- "1\t1",
- "1\t2",
- "50\t100",
- "500\t1000",
- "999\t1000",
- "1000\t1000",
- "998\t1000");
-
- test.runScript();
-
- /* Add expected values, computed using R:
- *
- * e.g.
- *
- * library(Hmisc)
- *
- * binconf(50,100)
- * binconf(500,1000)
- *
- */
- List<String> expectedOutput = new ArrayList<String>();
- expectedOutput.add("0.05129,1.00000");
- expectedOutput.add("0.02565,0.97435");
- expectedOutput.add("0.40383,0.59617");
- expectedOutput.add("0.46907,0.53093");
- expectedOutput.add("0.99436,0.99995");
- expectedOutput.add("0.99617,1.00000");
- expectedOutput.add("0.99274,0.99945");
-
- List<Tuple> output = this.getLinesForAlias(test, "data_out");
- Iterator<String> expectationIterator = expectedOutput.iterator();
- for (Tuple t : output)
- {
- assertTrue(expectationIterator.hasNext());
- Double lower = (Double)t.get(0);
- Double upper = (Double)t.get(1);
- assertEquals(expectationIterator.next(),String.format("%.5f,%.5f",lower,upper));
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/urls/UserAgentTest.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/urls/UserAgentTest.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/urls/UserAgentTest.java
deleted file mode 100644
index e742c0d..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/urls/UserAgentTest.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.urls;
-
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class UserAgentTest extends PigTests
-{
-
- @Test
- public void userAgentTest() throws Exception
- {
- PigTest test = createPigTest("datafu/urls/userAgentTest.pig");
-
- String[] input = {
- "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
- "Mozilla/5.0 (compatible; Konqueror/3.5; Linux; X11; de) KHTML/3.5.2 (like Gecko) Kubuntu 6.06 Dapper",
- "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.2a1pre) Gecko/20110331 Firefox/4.2a1pre Fennec/4.1a1pre",
- "Opera/9.00 (X11; Linux i686; U; en)",
- "Wget/1.10.2",
- "Opera/9.80 (Android; Linux; Opera Mobi/ADR-1012221546; U; pl) Presto/2.7.60 Version/10.5",
- "Mozilla/5.0 (Linux; U; Android 2.2; en-us; DROID2 Build/VZW) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"
- };
-
- String[] output = {
- "(mobile)",
- "(desktop)",
- "(mobile)",
- "(desktop)",
- "(desktop)",
- "(mobile)",
- "(mobile)",
- };
-
- test.assertOutput("data",input,"data_out",output);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/util/AssertTests.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/util/AssertTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/util/AssertTests.java
deleted file mode 100644
index 0379ae7..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/util/AssertTests.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.util;
-
-import static org.junit.Assert.*;
-
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class AssertTests extends PigTests
-{
- @Test
- public void shouldAssertWithMessageOnZero() throws Exception
- {
- try
- {
- PigTest test = createPigTest("datafu/util/assertWithMessageTest.pig");
-
- this.writeLinesToFile("input", "0");
-
- test.runScript();
-
- this.getLinesForAlias(test, "data2");
-
- fail("test should have failed, but it didn't");
- }
- catch (Exception e)
- {
- }
- }
-
- @Test
- public void shouldNotAssertWithMessageOnOne() throws Exception
- {
- PigTest test = createPigTest("datafu/util/assertWithMessageTest.pig");
-
- this.writeLinesToFile("input", "1");
-
- test.runScript();
-
- this.getLinesForAlias(test, "data2");
- }
-
- @Test
- public void shouldAssertWithoutMessageOnZero() throws Exception
- {
- try
- {
- PigTest test = createPigTest("datafu/util/assertWithoutMessageTest.pig");
-
- this.writeLinesToFile("input", "0");
-
- test.runScript();
-
- this.getLinesForAlias(test, "data2");
-
- fail("test should have failed, but it didn't");
- }
- catch (Exception e)
- {
- }
- }
-
- @Test
- public void shouldNotAssertWithoutMessageOnOne() throws Exception
- {
- PigTest test = createPigTest("datafu/util/assertWithoutMessageTest.pig");
-
- this.writeLinesToFile("input", "1");
-
- test.runScript();
-
- this.getLinesForAlias(test, "data2");
- }
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/util/IntBoolConversionPigTests.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/util/IntBoolConversionPigTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/util/IntBoolConversionPigTests.java
deleted file mode 100644
index 2653060..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/util/IntBoolConversionPigTests.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.util;
-
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class IntBoolConversionPigTests extends PigTests
-{
- @Test
- public void intToBoolTest() throws Exception
- {
- PigTest test = createPigTest("datafu/util/intToBoolTest.pig");
-
- String[] input = {
- "", // null
- "0",
- "1"
- };
-
- String[] output = {
- "(false)",
- "(false)",
- "(true)"
- };
-
- test.assertOutput("data",input,"data2",output);
- }
-
- @Test
- public void intToBoolToIntTest() throws Exception
- {
- PigTest test = createPigTest("datafu/util/intToBoolToIntTest.pig");
-
- String[] input = {
- "", // null
- "0",
- "1",
- "2",
- "-1",
- "-2",
- "0",
- ""
- };
-
- String[] output = {
- "(0)",
- "(0)",
- "(1)",
- "(1)",
- "(1)",
- "(1)",
- "(0)",
- "(0)"
- };
-
- test.assertOutput("data",input,"data3",output);
- }
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/aliasBagFieldsTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/aliasBagFieldsTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/aliasBagFieldsTest.pig
deleted file mode 100644
index 247c832..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/aliasBagFieldsTest.pig
+++ /dev/null
@@ -1,20 +0,0 @@
-register $JAR_PATH
-
-define AliasBagFields datafu.pig.bags.AliasBagFields('[a#alpha,b#numeric]');
-
-data = LOAD 'input' AS (data: bag {T: tuple(a:CHARARRAY, b:INT, c:INT)});
-
-data2 = FOREACH data GENERATE AliasBagFields(data) as data;
-
-describe data2;
-
-data3 = FOREACH data2 GENERATE FLATTEN(data);
-
-describe data3;
-
-data4 = FOREACH data3 GENERATE data::alpha, data::numeric;
-
-describe data4;
-
-STORE data4 INTO 'output';
-
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/appendToBagTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/appendToBagTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/appendToBagTest.pig
deleted file mode 100644
index d906bc4..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/appendToBagTest.pig
+++ /dev/null
@@ -1,9 +0,0 @@
-register $JAR_PATH
-
-define AppendToBag datafu.pig.bags.AppendToBag();
-
-data = LOAD 'input' AS (key:INT, B: bag{T: tuple(v:INT)}, T: tuple(v:INT));
-
-data2 = FOREACH data GENERATE key, AppendToBag(B,T) as B;
-
-STORE data2 INTO 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagConcatTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagConcatTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagConcatTest.pig
deleted file mode 100644
index 30d46a0..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagConcatTest.pig
+++ /dev/null
@@ -1,11 +0,0 @@
-register $JAR_PATH
-
-define BagConcat datafu.pig.bags.BagConcat();
-
-data = LOAD 'input' AS (A: bag{T: tuple(v:INT)}, B: bag{T: tuple(v:INT)}, C: bag{T: tuple(v:INT)});
-
-data2 = FOREACH data GENERATE BagConcat(A,B,C);
-
-describe data2
-
-STORE data2 INTO 'output';
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitTest.pig
deleted file mode 100644
index ee4f538..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitTest.pig
+++ /dev/null
@@ -1,14 +0,0 @@
-register $JAR_PATH
-
-define BagSplit datafu.pig.bags.BagSplit();
-
-data = LOAD 'input' AS (B:bag{T:tuple(val1:INT,val2:INT)});
-
-data2 = FOREACH data GENERATE BagSplit($MAX,B);
-describe data2;
-
-data3 = FOREACH data2 GENERATE FLATTEN($0);
-
-describe data3
-
-STORE data3 INTO 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitWithBagNumTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitWithBagNumTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitWithBagNumTest.pig
deleted file mode 100644
index 833e912..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/bagSplitWithBagNumTest.pig
+++ /dev/null
@@ -1,11 +0,0 @@
-register $JAR_PATH
-
-define BagSplit datafu.pig.bags.BagSplit('true');
-
-data = LOAD 'input' AS (B:bag{T:tuple(val1:INT,val2:INT)});
-
-data2 = FOREACH data GENERATE BagSplit($MAX,B);
-
-data3 = FOREACH data2 GENERATE FLATTEN($0);
-
-STORE data3 INTO 'output';
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/comprehensiveBagSplitAndEnumerate.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/comprehensiveBagSplitAndEnumerate.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/comprehensiveBagSplitAndEnumerate.pig
deleted file mode 100644
index 88d7392..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/comprehensiveBagSplitAndEnumerate.pig
+++ /dev/null
@@ -1,26 +0,0 @@
-register $JAR_PATH
-
-define BagSplit datafu.pig.bags.BagSplit();
-define Enumerate datafu.pig.bags.Enumerate('1');
-
-data = LOAD 'input' AS (data: bag {T: tuple(name:CHARARRAY, score:double)});
-
-data2 = FOREACH data GENERATE BagSplit(3,data) as the_bags;
-
-describe data2
-
-data3 = FOREACH data2 GENERATE Enumerate(the_bags) as enumerated_bags;
-
-describe data3
-
-data4 = FOREACH data3 GENERATE FLATTEN(enumerated_bags) as (data,i);
-
-describe data4
-
-data5 = FOREACH data4 GENERATE data as the_data, i as the_key;
-
-describe data5
-
-data_out = FOREACH data5 GENERATE FLATTEN(the_data), the_key;
-
-describe data_out
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/distinctByTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/distinctByTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/distinctByTest.pig
deleted file mode 100644
index 9532d07..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/distinctByTest.pig
+++ /dev/null
@@ -1,12 +0,0 @@
-register $JAR_PATH
-
-define DistinctBy datafu.pig.bags.DistinctBy('0');
-
-data = LOAD 'input' AS (data: bag {T: tuple(a:CHARARRAY, b:INT, c:INT)});
-
-data2 = FOREACH data GENERATE DistinctBy(data);
-
-describe data2;
-
-STORE data2 INTO 'output';
-
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateTest.pig
deleted file mode 100644
index 1647485..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateTest.pig
+++ /dev/null
@@ -1,16 +0,0 @@
-register $JAR_PATH
-
-define Enumerate datafu.pig.bags.Enumerate();
-
-data = LOAD 'input' AS (data: bag {T: tuple(v1:INT,B: bag{T: tuple(v2:INT)})});
-
-data2 = FOREACH data GENERATE Enumerate(data);
-describe data2;
-
-data3 = FOREACH data2 GENERATE FLATTEN($0);
-describe data3;
-
-data4 = FOREACH data3 GENERATE $0 as v1, $1 as B, $2 as i;
-describe data4;
-
-STORE data4 INTO 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithReverseTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithReverseTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithReverseTest.pig
deleted file mode 100644
index 1f04b04..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithReverseTest.pig
+++ /dev/null
@@ -1,16 +0,0 @@
-register $JAR_PATH
-
-define Enumerate datafu.pig.bags.Enumerate('1', 'true');
-
-data = LOAD 'input' AS (data: bag {T: tuple(v1:INT,B: bag{T: tuple(v2:INT)})});
-
-data2 = FOREACH data GENERATE Enumerate(data);
-describe data2;
-
-data3 = FOREACH data2 GENERATE FLATTEN($0);
-describe data3;
-
-data4 = FOREACH data3 GENERATE $0 as v1, $1 as B, $2 as i;
-describe data4;
-
-STORE data4 INTO 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithStartTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithStartTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithStartTest.pig
deleted file mode 100644
index d288a6e..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/enumerateWithStartTest.pig
+++ /dev/null
@@ -1,16 +0,0 @@
-register $JAR_PATH
-
-define Enumerate datafu.pig.bags.Enumerate('1');
-
-data = LOAD 'input' AS (data: bag {T: tuple(v1:INT,B: bag{T: tuple(v2:INT)})});
-
-data2 = FOREACH data GENERATE Enumerate(data);
-describe data2;
-
-data3 = FOREACH data2 GENERATE FLATTEN($0);
-describe data3;
-
-data4 = FOREACH data3 GENERATE $0 as v1, $1 as B, $2 as i;
-describe data4;
-
-STORE data4 INTO 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/firstTupleFromBagTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/firstTupleFromBagTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/firstTupleFromBagTest.pig
deleted file mode 100644
index 921787e..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/firstTupleFromBagTest.pig
+++ /dev/null
@@ -1,9 +0,0 @@
-register $JAR_PATH
-
-define FirstTupleFromBag datafu.pig.bags.FirstTupleFromBag();
-
-data = LOAD 'input' AS (key:INT, B: bag{T: tuple(v:INT)});
-
-data2 = FOREACH data GENERATE key, FirstTupleFromBag(B, null) as B;
-
-STORE data2 INTO 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/nullToEmptyBagTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/nullToEmptyBagTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/nullToEmptyBagTest.pig
deleted file mode 100644
index 3e809b3..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/nullToEmptyBagTest.pig
+++ /dev/null
@@ -1,14 +0,0 @@
-register $JAR_PATH
-
-define NullToEmptyBag datafu.pig.bags.NullToEmptyBag();
-
-data = LOAD 'input' AS (B: bag {T: tuple(v:INT)});
-
-dump data;
-
-data2 = FOREACH data GENERATE NullToEmptyBag(B) as P;
-
-dump data2;
-
-STORE data2 INTO 'output';
-
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/prependToBagTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/prependToBagTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/prependToBagTest.pig
deleted file mode 100644
index c852346..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/prependToBagTest.pig
+++ /dev/null
@@ -1,9 +0,0 @@
-register $JAR_PATH
-
-define PrependToBag datafu.pig.bags.PrependToBag();
-
-data = LOAD 'input' AS (key:INT, B: bag{T: tuple(v:INT)}, T: tuple(v:INT));
-
-data2 = FOREACH data GENERATE key, PrependToBag(B,T) as B;
-
-STORE data2 INTO 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setIntersectTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setIntersectTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setIntersectTest.pig
deleted file mode 100644
index 6f590e8..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setIntersectTest.pig
+++ /dev/null
@@ -1,9 +0,0 @@
-register $JAR_PATH
-
-define SetIntersect datafu.pig.bags.sets.SetIntersect();
-
-data = LOAD 'input' AS (B1:bag{T:tuple(val1:int,val2:int)},B2:bag{T:tuple(val1:int,val2:int)});
-
-data2 = FOREACH data GENERATE SetIntersect(B1,B2);
-
-STORE data2 INTO 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setUnionTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setUnionTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setUnionTest.pig
deleted file mode 100644
index a5e1c4d..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/sets/setUnionTest.pig
+++ /dev/null
@@ -1,13 +0,0 @@
-register $JAR_PATH
-
-define SetUnion datafu.pig.bags.sets.SetUnion();
-
-data = LOAD 'input' AS (B1:bag{T:tuple(val1:int,val2:int)},B2:bag{T:tuple(val1:int,val2:int)});
-
-dump data
-
-data2 = FOREACH data GENERATE SetUnion(B1,B2);
-
-dump data2
-
-STORE data2 INTO 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests.pig
deleted file mode 100644
index 1bf68bd..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests.pig
+++ /dev/null
@@ -1,16 +0,0 @@
-register $JAR_PATH
-
-define UnorderedPairs datafu.pig.bags.UnorderedPairs();
-
-data = LOAD 'input' AS (B: bag {T: tuple(v:INT)});
-
-data2 = FOREACH data GENERATE UnorderedPairs(B) as P;
-
-data3 = FOREACH data2 GENERATE FLATTEN(P);
-
-data4 = FOREACH data3 GENERATE FLATTEN(elem1), FLATTEN(elem2);
-
-data5 = ORDER data4 BY $0, $1;
-
-STORE data5 INTO 'output';
-
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests2.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests2.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests2.pig
deleted file mode 100644
index aada011..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/bags/unorderedPairsTests2.pig
+++ /dev/null
@@ -1,12 +0,0 @@
-register $JAR_PATH
-
-define UnorderedPairs datafu.pig.bags.UnorderedPairs();
-
-data = LOAD 'input' AS (A:int, B: bag {T: tuple(v:INT)});
-
-data2 = FOREACH data GENERATE A, UnorderedPairs(B) as P;
-
-data3 = FOREACH data2 GENERATE A, FLATTEN(P);
-
-STORE data3 INTO 'output';
-
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/date/timeCountPageViewsTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/date/timeCountPageViewsTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/date/timeCountPageViewsTest.pig
deleted file mode 100644
index 1e23a41..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/date/timeCountPageViewsTest.pig
+++ /dev/null
@@ -1,13 +0,0 @@
-register $JAR_PATH
-
-define TimeCount datafu.pig.date.TimeCount('$TIME_WINDOW');
-
-views = LOAD 'input' AS (user_id:int, page_id:int, time:chararray);
-
-views_grouped = GROUP views BY (user_id, page_id);
-view_counts = foreach views_grouped {
- views = order views by time;
- generate group.user_id as user_id, group.page_id as page_id, TimeCount(views.(time)) as count;
-}
-
-STORE view_counts INTO 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/geo/haversineTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/geo/haversineTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/geo/haversineTest.pig
deleted file mode 100644
index e52cc1f..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/geo/haversineTest.pig
+++ /dev/null
@@ -1,9 +0,0 @@
-register $JAR_PATH
-
-define HaversineDistInMiles datafu.pig.geo.HaversineDistInMiles();
-
-data = LOAD 'input' AS (lat1:double,lng1:double,lat2:double,lng2:double);
-
-data2 = FOREACH data GENERATE HaversineDistInMiles(lat1,lng1,lat2,lng2);
-
-STORE data2 INTO 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Base64Test.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Base64Test.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Base64Test.pig
deleted file mode 100644
index 5a12c2e..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Base64Test.pig
+++ /dev/null
@@ -1,9 +0,0 @@
-register $JAR_PATH
-
-define MD5 datafu.pig.hash.MD5Base64();
-
-data_in = LOAD 'input' as (val:chararray);
-
-data_out = FOREACH data_in GENERATE MD5(val) as val;
-
-STORE data_out INTO 'output';
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Test.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Test.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Test.pig
deleted file mode 100644
index 3fc6aaa..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/hash/md5Test.pig
+++ /dev/null
@@ -1,9 +0,0 @@
-register $JAR_PATH
-
-define MD5 datafu.pig.hash.MD5();
-
-data_in = LOAD 'input' as (val:chararray);
-
-data_out = FOREACH data_in GENERATE MD5(val) as val;
-
-STORE data_out INTO 'output';
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/linkanalysis/pageRankTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/linkanalysis/pageRankTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/linkanalysis/pageRankTest.pig
deleted file mode 100644
index a0e439c..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/linkanalysis/pageRankTest.pig
+++ /dev/null
@@ -1,25 +0,0 @@
-register $JAR_PATH
-
-/* Need to enable dangling node handling since the Wikipedia example has them,
- otherwise the ranks won't be right. */
-define PageRank datafu.pig.linkanalysis.PageRank('dangling_nodes','true');
-
-data = LOAD 'input' AS (topic:INT,source:INT,dest:INT,weight:DOUBLE);
-
-data_grouped = GROUP data by (topic,source);
-
-data_grouped = foreach data_grouped {
- generate group.topic as topic, group.source as source, data.(dest,weight) as edges;
-};
-
-data_grouped2 = GROUP data_grouped by topic;
-data_grouped2 = foreach data_grouped2 {
- generate group as topic, FLATTEN(PageRank(data_grouped.(source,edges))) as (source,rank);
-};
-
-data_grouped3 = FOREACH data_grouped2 GENERATE
- topic,
- source,
- rank;
-
-STORE data_grouped3 INTO 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/numbers/randomIntRangeTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/numbers/randomIntRangeTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/numbers/randomIntRangeTest.pig
deleted file mode 100644
index 3ca45c7..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/numbers/randomIntRangeTest.pig
+++ /dev/null
@@ -1,8 +0,0 @@
-register $JAR_PATH
-
-define RandInt datafu.pig.numbers.RandInt();
-
-data = LOAD 'input' AS (key:INT);
-data2 = FOREACH data GENERATE key, RandInt($MIN,$MAX) as val;
-
-STORE data2 INTO 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/sessions/sessionizeTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/sessions/sessionizeTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/sessions/sessionizeTest.pig
deleted file mode 100644
index 6a4939e..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/sessions/sessionizeTest.pig
+++ /dev/null
@@ -1,17 +0,0 @@
-register $JAR_PATH
-
-define Sessionize datafu.pig.sessions.Sessionize('$TIME_WINDOW');
-
-views = LOAD 'input' AS (time:chararray, user_id:int, value:int);
-
-views_grouped = GROUP views BY user_id;
-view_counts = FOREACH views_grouped {
- views = ORDER views BY time;
- GENERATE flatten(Sessionize(views)) as (time,user_id,value,session_id);
-}
-
-max_value = GROUP view_counts BY (user_id, session_id);
-
-max_value = FOREACH max_value GENERATE group.user_id, MAX(view_counts.value) AS val;
-
-STORE max_value INTO 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairDefault.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairDefault.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairDefault.pig
deleted file mode 100644
index a121cb1..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairDefault.pig
+++ /dev/null
@@ -1,14 +0,0 @@
-register $JAR_PATH
-
-define markovPairs datafu.pig.stats.MarkovPairs();
-
-data = load 'input' as $schema;
-describe data;
-
-data_out1 = foreach data generate data as orig_bag;
-describe data_out1;
-
-data_out = foreach data_out1 generate markovPairs(orig_bag) as markov_bag;
-describe data_out;
-
-store data_out into 'output';
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairLookahead.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairLookahead.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairLookahead.pig
deleted file mode 100644
index 269a1bc..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/markovPairLookahead.pig
+++ /dev/null
@@ -1,14 +0,0 @@
-register $JAR_PATH
-
-define markovPairs datafu.pig.stats.MarkovPairs('$lookahead');
-
-data = load 'input' as $schema;
-describe data;
-
-data_out1 = foreach data generate data as orig_bag;
-describe data_out1;
-
-data_out = foreach data_out1 generate markovPairs(orig_bag) as markov_bag;
-describe data_out;
-
-store data_out into 'output';
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/medianTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/medianTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/medianTest.pig
deleted file mode 100644
index 0a439ce..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/medianTest.pig
+++ /dev/null
@@ -1,21 +0,0 @@
-register $JAR_PATH
-
-define Median datafu.pig.stats.Median();
-
-data_in = LOAD 'input' as (val:int);
-
-/*describe data_in;*/
-
-data_out = GROUP data_in ALL;
-
-/*describe data_out;*/
-
-data_out = FOREACH data_out {
- sorted = ORDER data_in BY val;
- GENERATE Median(sorted) as medians;
-}
-data_out = FOREACH data_out GENERATE FLATTEN(medians);
-
-/*describe data_out;*/
-
-STORE data_out into 'output';
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/quantileTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/quantileTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/quantileTest.pig
deleted file mode 100644
index 604d179..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/quantileTest.pig
+++ /dev/null
@@ -1,21 +0,0 @@
-register $JAR_PATH
-
-define Quantile datafu.pig.stats.Quantile($QUANTILES);
-
-data_in = LOAD 'input' as (val:int);
-
-/*describe data_in;*/
-
-data_out = GROUP data_in ALL;
-
-/*describe data_out;*/
-
-data_out = FOREACH data_out {
- sorted = ORDER data_in BY val;
- GENERATE Quantile(sorted) as quantiles;
-}
-data_out = FOREACH data_out GENERATE FLATTEN(quantiles);
-
-/*describe data_out;*/
-
-STORE data_out into 'output';
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingMedianTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingMedianTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingMedianTest.pig
deleted file mode 100644
index 27d64f3..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingMedianTest.pig
+++ /dev/null
@@ -1,21 +0,0 @@
-register $JAR_PATH
-
-define Median datafu.pig.stats.StreamingMedian();
-
-data_in = LOAD 'input' as (val:int);
-
-/*describe data_in;*/
-
-data_out = GROUP data_in ALL;
-
-/*describe data_out;*/
-
-data_out = FOREACH data_out {
- sorted = ORDER data_in BY val;
- GENERATE Median(sorted) as medians;
-}
-data_out = FOREACH data_out GENERATE FLATTEN(medians);
-
-/*describe data_out;*/
-
-STORE data_out into 'output';
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingQuantileTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingQuantileTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingQuantileTest.pig
deleted file mode 100644
index 51c3bc5..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/streamingQuantileTest.pig
+++ /dev/null
@@ -1,18 +0,0 @@
-register $JAR_PATH
-
-define Quantile datafu.pig.stats.StreamingQuantile($QUANTILES);
-
-data_in = LOAD 'input' as (val:int);
-
-/*describe data_in;*/
-
-data_out = GROUP data_in ALL;
-
-/*describe data_out;*/
-
-data_out = FOREACH data_out GENERATE Quantile(data_in.val) as quantiles;
-data_out = FOREACH data_out GENERATE FLATTEN(quantiles);
-
-/*describe data_out;*/
-
-STORE data_out into 'output';
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/wilsonBinConfTests.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/wilsonBinConfTests.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/wilsonBinConfTests.pig
deleted file mode 100644
index 19fa466..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/stats/wilsonBinConfTests.pig
+++ /dev/null
@@ -1,11 +0,0 @@
-register $JAR_PATH
-
-define WilsonBinConf datafu.pig.stats.WilsonBinConf('$alpha');
-
-data = load 'input' as (successes:long, totals:long);
-describe data;
-
-data_out = FOREACH data GENERATE WilsonBinConf(successes, totals) as interval;
-data_out = FOREACH data_out GENERATE FLATTEN(interval);
-
-store data_out into 'output';
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/urls/userAgentTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/urls/userAgentTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/urls/userAgentTest.pig
deleted file mode 100644
index 4548755..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/urls/userAgentTest.pig
+++ /dev/null
@@ -1,8 +0,0 @@
-register $JAR_PATH
-
-define UserAgentClassify datafu.pig.urls.UserAgentClassify();
-
-data = load 'input' as (usr_agent:chararray);
-data_out = foreach data generate UserAgentClassify(usr_agent) as class;
-describe data_out;
-store data_out into 'output';
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithMessageTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithMessageTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithMessageTest.pig
deleted file mode 100644
index f240987..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithMessageTest.pig
+++ /dev/null
@@ -1,10 +0,0 @@
-register $JAR_PATH
-
-define ASSERT datafu.pig.util.ASSERT();
-
-data = LOAD 'input' AS (val:INT);
-
-data2 = FILTER data BY ASSERT(val,'assertion appears to have failed, doh!');
-
-STORE data2 INTO 'output';
-
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithoutMessageTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithoutMessageTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithoutMessageTest.pig
deleted file mode 100644
index c6368e7..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/assertWithoutMessageTest.pig
+++ /dev/null
@@ -1,10 +0,0 @@
-register $JAR_PATH
-
-define ASSERT datafu.pig.util.ASSERT();
-
-data = LOAD 'input' AS (val:INT);
-
-data2 = FILTER data BY ASSERT(val);
-
-STORE data2 INTO 'output';
-
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolTest.pig
deleted file mode 100644
index 18cda42..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolTest.pig
+++ /dev/null
@@ -1,10 +0,0 @@
-register $JAR_PATH
-
-define IntToBool datafu.pig.util.IntToBool();
-
-data = LOAD 'input' AS (val:INT);
-
-data2 = FOREACH data GENERATE IntToBool(val);
-
-STORE data2 INTO 'output';
-
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolToIntTest.pig
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolToIntTest.pig b/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolToIntTest.pig
deleted file mode 100644
index 82d3ee0..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/resources/datafu/util/intToBoolToIntTest.pig
+++ /dev/null
@@ -1,12 +0,0 @@
-register $JAR_PATH
-
-define IntToBool datafu.pig.util.IntToBool();
-define BoolToInt datafu.pig.util.BoolToInt();
-
-data = LOAD 'input' AS (val:INT);
-
-data2 = FOREACH data GENERATE IntToBool(val) as val;
-data3 = FOREACH data2 GENERATE BoolToInt(val) as val;
-
-STORE data3 INTO 'output';
-
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml b/bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml
index 19365e8..d9afcb3 100644
--- a/bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml
+++ b/bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml
@@ -446,25 +446,6 @@ easy to test, and efficient to run.</description>
<hive-hcatalog>/self</hive-hcatalog>
</deps>
</hive-hcatalog-server>
- <pig-udf-datafu>
- <metadata>
- <summary>A collection of user-defined functions for Hadoop and Pig.</summary>
- <description> DataFu is a collection of user-defined functions for working with large-scale
- data in Hadoop and Pig. This library was born out of the need for a stable,
- well-tested library of UDFs for data mining and statistics. It is used
- at LinkedIn in many of our off-line workflows for data derived products like
- "People You May Know" and "Skills".
-
- It contains functions for: PageRank, Quantiles (median), variance, Sessionization,
- Convenience bag functions (e.g., set operations, enumerating bags, etc),
- Convenience utility functions (e.g., assertions, easier writing of EvalFuncs)
- and more...</description>
- <url>https://github.com/linkedin/datafu</url>
- </metadata>
- <deps>
- <pig/>
- </deps>
- </pig-udf-datafu>
<hive-jdbc>
<metadata>
<summary>Provides libraries necessary to connect to Apache Hive via JDBC</summary>
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/pom.xml
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/pom.xml b/bigtop-tests/test-artifacts/pom.xml
index f8dea78..b0af51e 100644
--- a/bigtop-tests/test-artifacts/pom.xml
+++ b/bigtop-tests/test-artifacts/pom.xml
@@ -45,7 +45,6 @@
<module>hue</module>
<module>solr</module>
<module>crunch</module>
- <module>datafu</module>
<module>longevity</module>
<module>hcatalog</module>
<module>spark</module>
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-execution/smokes/datafu/pom.xml
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-execution/smokes/datafu/pom.xml b/bigtop-tests/test-execution/smokes/datafu/pom.xml
deleted file mode 100644
index 45b007f..0000000
--- a/bigtop-tests/test-execution/smokes/datafu/pom.xml
+++ /dev/null
@@ -1,140 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
-
- <parent>
- <groupId>org.apache.bigtop.itest</groupId>
- <artifactId>smoke-tests</artifactId>
- <version>1.3.1-SNAPSHOT</version>
- <relativePath>../pom.xml</relativePath>
- </parent>
-
- <groupId>org.apache.bigtop.itest</groupId>
- <artifactId>datafu-smoke-execution</artifactId>
- <version>1.3.1-SNAPSHOT</version>
- <name>datafu smoke test execution</name>
-
- <properties>
- <org.apache.maven-dependency-plugin.groupId>org.apache.bigtop.itest</org.apache.maven-dependency-plugin.groupId>
- <org.apache.maven-dependency-plugin.artifactId>datafu-smoke</org.apache.maven-dependency-plugin.artifactId>
- <org.apache.maven-dependency-plugin.version>${project.version}</org.apache.maven-dependency-plugin.version>
- <org.apache.maven-dependency-plugin.output>${project.build.directory}</org.apache.maven-dependency-plugin.output>
- <org.apache.maven-dependency-plugin.type>jar</org.apache.maven-dependency-plugin.type>
- <org.apache.maven-failsafe-plugin.testInclude>**/*Tests*</org.apache.maven-failsafe-plugin.testInclude>
-
- <HADOOP_MAPRED_HOME>${env.HADOOP_MAPRED_HOME}</HADOOP_MAPRED_HOME>
- <HADOOP_CONF_DIR>${env.HADOOP_CONF_DIR}</HADOOP_CONF_DIR>
- <PIG_HOME>${env.PIG_HOME}</PIG_HOME>
- </properties>
-
- <dependencies>
- <dependency>
- <groupId>${org.apache.maven-dependency-plugin.groupId}</groupId>
- <artifactId>${org.apache.maven-dependency-plugin.artifactId}</artifactId>
- <version>${project.version}</version>
- </dependency>
- </dependencies>
-
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-install-plugin</artifactId>
- <executions>
- <execution>
- <phase>initialize</phase>
- <goals>
- <goal>install-file</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <file>${PIG_HOME}/pig.jar</file>
- <groupId>org.apache.pig</groupId>
- <artifactId>pig</artifactId>
- <version>${pig.version}</version>
- <packaging>jar</packaging>
- </configuration>
- </plugin>
-
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-dependency-plugin</artifactId>
- </plugin>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-enforcer-plugin</artifactId>
- <version>1.0</version>
- <executions>
- <execution>
- <id>enforce-property</id>
- <goals>
- <goal>enforce</goal>
- </goals>
- <configuration>
- <rules>
- <requireProperty>
- <property>HADOOP_MAPRED_HOME</property>
- <message>HADOOP_MAPRED_HOME env. variable has to be set</message>
- </requireProperty>
- <requireProperty>
- <property>HADOOP_CONF_DIR</property>
- <message>HADOOP_CONF_DIR env. variable has to be set</message>
- </requireProperty>
- <requireProperty>
- <property>PIG_HOME</property>
- <message>PIG_HOME env. variable has to be set</message>
- </requireProperty>
- </rules>
- <fail>true</fail>
- </configuration>
- </execution>
- </executions>
- </plugin>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-failsafe-plugin</artifactId>
- <version>2.11</version>
- <configuration>
- <forkMode>always</forkMode>
- <argLine>-Dpigunit.exectype.cluster=true</argLine>
- <additionalClasspathElements>
- <additionalClasspathElement>${HADOOP_CONF_DIR}</additionalClasspathElement>
- </additionalClasspathElements>
- <systemPropertyVariables>
- <datafu.jar.dir>${PIG_HOME}</datafu.jar.dir>
- </systemPropertyVariables>
- </configuration>
-
- <!-- Disabling for now: configuration>
- <testSourceDirectory>src</testSourceDirectory>
- <testClassesDirectory>target/classes</testClassesDirectory>
- <skipTests>false</skipTests>
- <testFailureIgnore>false</testFailureIgnore>
- <argLine>-Dsun.lang.ClassLoader.allowArraySyntax=true -Djava.endorsed.dirs=${project.build.testOutputDirectory}/endorsed</argLine>
- </configuration -->
- </plugin>
- </plugins>
- </build>
-</project>
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop.bom
----------------------------------------------------------------------
diff --git a/bigtop.bom b/bigtop.bom
index 02e5b96..3664f7f 100644
--- a/bigtop.bom
+++ b/bigtop.bom
@@ -252,17 +252,6 @@ bigtop {
site = "${apache.APACHE_MIRROR}/${download_path}"
archive = "${apache.APACHE_ARCHIVE}/${download_path}" }
}
- 'datafu' {
- name = 'datafu'
- pkg = 'pig-udf-datafu'
- relNotes = 'Pig UDF Datafu'
- version { base = '1.3.0'; pkg = base; release = 1 }
- tarball { destination = "$name-${version.base}.tar.gz"
- source = "apache-$name-incubating-sources-${version.base}.tgz" }
- url { download_path = "incubator/$name/apache-$name-incubating-${version.base}"
- site = "${apache.APACHE_MIRROR}/${download_path}"
- archive = "${apache.APACHE_ARCHIVE}/${download_path}" }
- }
'solr' {
name = 'solr'
relNotes = 'Apache Solr'
[2/2] bigtop git commit: BIGTOP-3086: Drop datafu packaging
Posted by ju...@apache.org.
BIGTOP-3086: Drop datafu packaging
Since pig has been removed with BIGTOP-3075,
we should remove datafu as well, since datafu-pig depends on pig.
Change-Id: Ide8d8cb5e8223cf6307a48f7e31a606a02dcefdb
Signed-off-by: Yuqi Gu <yu...@arm.com>
Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo
Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/4cee56bd
Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/4cee56bd
Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/4cee56bd
Branch: refs/heads/master
Commit: 4cee56bdc831b254805a121d43c33fc6a3a9c53e
Parents: aaffc1e
Author: Yuqi Gu <yu...@arm.com>
Authored: Mon Oct 8 06:11:23 2018 +0000
Committer: Jun He <ju...@apache.org>
Committed: Mon Dec 10 05:17:39 2018 +0000
----------------------------------------------------------------------
bigtop-ci/jenkins/jobsCreator.groovy | 4 +-
.../src/common/datafu/do-component-build | 22 -
.../src/common/datafu/install_datafu.sh | 80 ----
bigtop-packages/src/deb/datafu/changelog | 1 -
bigtop-packages/src/deb/datafu/compat | 1 -
bigtop-packages/src/deb/datafu/control | 38 --
bigtop-packages/src/deb/datafu/copyright | 15 -
.../src/deb/datafu/pig-udf-datafu.install | 1 -
bigtop-packages/src/deb/datafu/rules | 36 --
bigtop-packages/src/deb/datafu/source/format | 1 -
bigtop-packages/src/rpm/datafu/BUILD/.gitignore | 0
bigtop-packages/src/rpm/datafu/RPMS/.gitignore | 0
.../src/rpm/datafu/SOURCES/.gitignore | 0
.../src/rpm/datafu/SPECS/datafu.spec | 72 ---
bigtop-packages/src/rpm/datafu/SRPMS/.gitignore | 0
bigtop-tests/test-artifacts/datafu/pom.xml | 68 ---
.../java/datafu/linkanalysis/PageRank.java | 441 -------------------
.../java/datafu/pig/linkanalysis/PageRank.java | 372 ----------------
.../apache/bigtop/itest/datafu/PigTests.java | 211 ---------
.../bigtop/itest/datafu/bags/BagTests.java | 308 -------------
.../bigtop/itest/datafu/bags/sets/SetTests.java | 74 ----
.../bigtop/itest/datafu/date/TimeTests.java | 65 ---
.../bigtop/itest/datafu/geo/GeoTests.java | 75 ----
.../bigtop/itest/datafu/hash/HashTests.java | 63 ---
.../itest/datafu/linkanalysis/PageRankTest.java | 299 -------------
.../datafu/linkanalysis/PageRankTests.java | 120 -----
.../itest/datafu/numbers/NumberTests.java | 65 ---
.../itest/datafu/sessions/SessionTests.java | 92 ----
.../itest/datafu/stats/MarkovPairTests.java | 105 -----
.../itest/datafu/stats/QuantileTests.java | 196 ---------
.../itest/datafu/stats/WilsonBinConfTests.java | 81 ----
.../bigtop/itest/datafu/urls/UserAgentTest.java | 57 ---
.../bigtop/itest/datafu/util/AssertTests.java | 93 ----
.../datafu/util/IntBoolConversionPigTests.java | 77 ----
.../datafu/bags/aliasBagFieldsTest.pig | 20 -
.../resources/datafu/bags/appendToBagTest.pig | 9 -
.../resources/datafu/bags/bagConcatTest.pig | 11 -
.../main/resources/datafu/bags/bagSplitTest.pig | 14 -
.../datafu/bags/bagSplitWithBagNumTest.pig | 11 -
.../bags/comprehensiveBagSplitAndEnumerate.pig | 26 --
.../resources/datafu/bags/distinctByTest.pig | 12 -
.../resources/datafu/bags/enumerateTest.pig | 16 -
.../datafu/bags/enumerateWithReverseTest.pig | 16 -
.../datafu/bags/enumerateWithStartTest.pig | 16 -
.../datafu/bags/firstTupleFromBagTest.pig | 9 -
.../datafu/bags/nullToEmptyBagTest.pig | 14 -
.../resources/datafu/bags/prependToBagTest.pig | 9 -
.../datafu/bags/sets/setIntersectTest.pig | 9 -
.../resources/datafu/bags/sets/setUnionTest.pig | 13 -
.../datafu/bags/unorderedPairsTests.pig | 16 -
.../datafu/bags/unorderedPairsTests2.pig | 12 -
.../datafu/date/timeCountPageViewsTest.pig | 13 -
.../main/resources/datafu/geo/haversineTest.pig | 9 -
.../resources/datafu/hash/md5Base64Test.pig | 9 -
.../src/main/resources/datafu/hash/md5Test.pig | 9 -
.../datafu/linkanalysis/pageRankTest.pig | 25 --
.../datafu/numbers/randomIntRangeTest.pig | 8 -
.../datafu/sessions/sessionizeTest.pig | 17 -
.../datafu/stats/markovPairDefault.pig | 14 -
.../datafu/stats/markovPairLookahead.pig | 14 -
.../main/resources/datafu/stats/medianTest.pig | 21 -
.../resources/datafu/stats/quantileTest.pig | 21 -
.../datafu/stats/streamingMedianTest.pig | 21 -
.../datafu/stats/streamingQuantileTest.pig | 18 -
.../datafu/stats/wilsonBinConfTests.pig | 11 -
.../resources/datafu/urls/userAgentTest.pig | 8 -
.../datafu/util/assertWithMessageTest.pig | 10 -
.../datafu/util/assertWithoutMessageTest.pig | 10 -
.../resources/datafu/util/intToBoolTest.pig | 10 -
.../datafu/util/intToBoolToIntTest.pig | 12 -
.../package/src/main/resources/package_data.xml | 19 -
bigtop-tests/test-artifacts/pom.xml | 1 -
.../test-execution/smokes/datafu/pom.xml | 140 ------
bigtop.bom | 11 -
74 files changed, 2 insertions(+), 3795 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-ci/jenkins/jobsCreator.groovy
----------------------------------------------------------------------
diff --git a/bigtop-ci/jenkins/jobsCreator.groovy b/bigtop-ci/jenkins/jobsCreator.groovy
index 5ced028..3d5240d 100644
--- a/bigtop-ci/jenkins/jobsCreator.groovy
+++ b/bigtop-ci/jenkins/jobsCreator.groovy
@@ -16,8 +16,8 @@
*/
// FIXME: it would be nice to extract the following from bigtop.mk on the fly
-def bigtopComponents = ["bigtop-groovy", "bigtop-jsvc", "bigtop-tomcat", "bigtop-utils",
- "zookeeper", "hadoop", "hbase", "hive", "pig", "crunch", "datafu",
+def bigtopComponents = ["bigtop-groovy", "bigtop-jsvc", "bigtop-tomcat", "bigtop-utils",
+ "zookeeper", "hadoop", "hbase", "hive", "pig", "crunch",
"flume", "giraph", "ignite-hadoop", "mahout", "oozie", "phoenix",
"solr", "spark", "sqoop", "alluxio", "whirr"]
// FIXME: it would be nice to extract the following from some static configuration file
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/common/datafu/do-component-build
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/datafu/do-component-build b/bigtop-packages/src/common/datafu/do-component-build
deleted file mode 100644
index e8ea9a7..0000000
--- a/bigtop-packages/src/common/datafu/do-component-build
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -ex
-
-. `dirname $0`/bigtop.bom
-
-gradle -b bootstrap.gradle
-./gradlew clean assemble
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/common/datafu/install_datafu.sh
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/datafu/install_datafu.sh b/bigtop-packages/src/common/datafu/install_datafu.sh
deleted file mode 100755
index df65c9e..0000000
--- a/bigtop-packages/src/common/datafu/install_datafu.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-
-usage() {
- echo "
-usage: $0 <options>
- Required not-so-options:
- --build-dir=DIR path to pig dist.dir
- --prefix=PREFIX path to install into
-
- Optional options:
- --lib-dir=DIR path to install pig home [/usr/lib/pig]
- --build-dir=DIR path to pig dist dir
- ... [ see source for more similar options ]
- "
- exit 1
-}
-
-OPTS=$(getopt \
- -n $0 \
- -o '' \
- -l 'prefix:' \
- -l 'lib-dir:' \
- -l 'build-dir:' -- "$@")
-
-if [ $? != 0 ] ; then
- usage
-fi
-
-eval set -- "$OPTS"
-while true ; do
- case "$1" in
- --prefix)
- PREFIX=$2 ; shift 2
- ;;
- --build-dir)
- BUILD_DIR=$2 ; shift 2
- ;;
- --lib-dir)
- LIB_DIR=$2 ; shift 2
- ;;
- --)
- shift ; break
- ;;
- *)
- echo "Unknown option: $1"
- usage
- exit 1
- ;;
- esac
-done
-
-for var in PREFIX BUILD_DIR ; do
- if [ -z "$(eval "echo \$$var")" ]; then
- echo Missing param: $var
- usage
- fi
-done
-
-LIB_DIR=${LIB_DIR:-/usr/lib/pig}
-
-# First we'll move everything into lib
-install -d -m 0755 $PREFIX/$LIB_DIR
-cp $BUILD_DIR/datafu-*.jar $PREFIX/$LIB_DIR
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/deb/datafu/changelog
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/datafu/changelog b/bigtop-packages/src/deb/datafu/changelog
deleted file mode 100644
index 547ed02..0000000
--- a/bigtop-packages/src/deb/datafu/changelog
+++ /dev/null
@@ -1 +0,0 @@
---- This is auto-generated
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/deb/datafu/compat
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/datafu/compat b/bigtop-packages/src/deb/datafu/compat
deleted file mode 100644
index 7f8f011..0000000
--- a/bigtop-packages/src/deb/datafu/compat
+++ /dev/null
@@ -1 +0,0 @@
-7
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/deb/datafu/control
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/datafu/control b/bigtop-packages/src/deb/datafu/control
deleted file mode 100644
index 04a5a65..0000000
--- a/bigtop-packages/src/deb/datafu/control
+++ /dev/null
@@ -1,38 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-Source: pig-udf-datafu
-Section: misc
-Priority: extra
-Maintainer: Bigtop <de...@bigtop.apache.org>
-Build-Depends: debhelper (>= 7.0.50~)
-Standards-Version: 3.8.0
-Homepage: https://github.com/linkedin/datafu
-
-Package: pig-udf-datafu
-Architecture: all
-Depends: pig
-Description: A collection of user-defined functions for Hadoop and Pig.
- DataFu is a collection of user-defined functions for working with large-scale
- data in Hadoop and Pig. This library was born out of the need for a stable,
- well-tested library of UDFs for data mining and statistics. It is used
- at LinkedIn in many of our off-line workflows for data derived products like
- "People You May Know" and "Skills".
- .
- It contains functions for: PageRank, Quantiles (median), variance, Sessionization,
- Convenience bag functions (e.g., set operations, enumerating bags, etc),
- Convenience utility functions (e.g., assertions, easier writing of EvalFuncs)
- and more...
-
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/deb/datafu/copyright
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/datafu/copyright b/bigtop-packages/src/deb/datafu/copyright
deleted file mode 100644
index 422ec82..0000000
--- a/bigtop-packages/src/deb/datafu/copyright
+++ /dev/null
@@ -1,15 +0,0 @@
-Format: http://dep.debian.net/deps/dep5
-Source: https://github.com/linkedin/datafu
-Upstream-Name: DataFu
-
-Files: *
-Copyright: 2010, LinkedIn, Inc
-License: Apache-2.0
-
-Files debian/*
-Copyright: 2011, The Apache Software Foundation
-License: Apache-2.0
-
-License: Apache-2.0
- On Debian systems, the complete text of the Apache 2.0 license
- can be found in "/usr/share/common-licenses/Apache-2.0".
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/deb/datafu/pig-udf-datafu.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/datafu/pig-udf-datafu.install b/bigtop-packages/src/deb/datafu/pig-udf-datafu.install
deleted file mode 100644
index 6a9697b..0000000
--- a/bigtop-packages/src/deb/datafu/pig-udf-datafu.install
+++ /dev/null
@@ -1 +0,0 @@
-/usr/lib/pig
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/deb/datafu/rules
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/datafu/rules b/bigtop-packages/src/deb/datafu/rules
deleted file mode 100755
index 24a5f38..0000000
--- a/bigtop-packages/src/deb/datafu/rules
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/make -f
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# -*- makefile -*-
-
-# Uncomment this to turn on verbose mode.
-export DH_VERBOSE=1
-
-# This has to be exported to make some magic below work.
-export DH_OPTIONS
-
-%:
- dh $@
-
-override_dh_auto_build:
- # we'll just use the build from the tarball.
- bash debian/do-component-build -Divy.home=${HOME}/.ivy2
-
-override_dh_auto_install:
- sh -x debian/install_datafu.sh \
- --build-dir=datafu-pig/build/libs \
- --prefix=debian/tmp
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/deb/datafu/source/format
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/datafu/source/format b/bigtop-packages/src/deb/datafu/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/bigtop-packages/src/deb/datafu/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/rpm/datafu/BUILD/.gitignore
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/datafu/BUILD/.gitignore b/bigtop-packages/src/rpm/datafu/BUILD/.gitignore
deleted file mode 100644
index e69de29..0000000
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/rpm/datafu/RPMS/.gitignore
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/datafu/RPMS/.gitignore b/bigtop-packages/src/rpm/datafu/RPMS/.gitignore
deleted file mode 100644
index e69de29..0000000
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/rpm/datafu/SOURCES/.gitignore
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/datafu/SOURCES/.gitignore b/bigtop-packages/src/rpm/datafu/SOURCES/.gitignore
deleted file mode 100644
index e69de29..0000000
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/rpm/datafu/SPECS/datafu.spec
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/datafu/SPECS/datafu.spec b/bigtop-packages/src/rpm/datafu/SPECS/datafu.spec
deleted file mode 100644
index 0185736..0000000
--- a/bigtop-packages/src/rpm/datafu/SPECS/datafu.spec
+++ /dev/null
@@ -1,72 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-%define datafu_name datafu
-%define lib_datafu /usr/lib/pig
-
-%if %{?suse_version:1}0
-%define doc_datafu %{_docdir}/datafu-doc
-%else
-%define doc_datafu %{_docdir}/datafu-doc-%{datafu_version}
-%endif
-
-# disable repacking jars
-%define __os_install_post %{nil}
-
-Name: pig-udf-datafu
-Version: %{datafu_version}
-Release: %{datafu_release}
-Summary: A collection of user-defined functions for Hadoop and Pig.
-URL: https://github.com/linkedin/datafu
-Group: Development/Libraries
-BuildArch: noarch
-Buildroot: %(mktemp -ud %{_tmppath}/%{datafu_name}-%{version}-%{release}-XXXXXX)
-License: ASL 2.0
-Source0: %{datafu_name}-%{datafu_base_version}.tar.gz
-Source1: do-component-build
-Source2: install_%{datafu_name}.sh
-Requires: hadoop-client, bigtop-utils >= 0.7
-
-
-%description
-DataFu is a collection of user-defined functions for working with large-scale
-data in Hadoop and Pig. This library was born out of the need for a stable,
-well-tested library of UDFs for data mining and statistics. It is used
-at LinkedIn in many of our off-line workflows for data derived products like
-"People You May Know" and "Skills".
-
-It contains functions for: PageRank, Quantiles (median), variance, Sessionization,
-Convenience bag functions (e.g., set operations, enumerating bags, etc),
-Convenience utility functions (e.g., assertions, easier writing of EvalFuncs)
-and more...
-
-%prep
-%setup -n apache-%{datafu_name}-incubating-sources-%{datafu_base_version}
-
-%build
-bash $RPM_SOURCE_DIR/do-component-build
-
-%install
-%__rm -rf $RPM_BUILD_ROOT
-sh $RPM_SOURCE_DIR/install_datafu.sh \
- --build-dir=datafu-pig/build/libs \
- --prefix=$RPM_BUILD_ROOT
-
-#######################
-#### FILES SECTION ####
-#######################
-%files
-%defattr(-,root,root,755)
-%{lib_datafu}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-packages/src/rpm/datafu/SRPMS/.gitignore
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/datafu/SRPMS/.gitignore b/bigtop-packages/src/rpm/datafu/SRPMS/.gitignore
deleted file mode 100644
index e69de29..0000000
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/pom.xml
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/pom.xml b/bigtop-tests/test-artifacts/datafu/pom.xml
deleted file mode 100644
index f0ed55b..0000000
--- a/bigtop-tests/test-artifacts/datafu/pom.xml
+++ /dev/null
@@ -1,68 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-
- <parent>
- <groupId>org.apache.bigtop.itest</groupId>
- <artifactId>bigtop-smokes</artifactId>
- <version>1.3.1-SNAPSHOT</version>
- <relativePath>../pom.xml</relativePath>
- </parent>
-
- <modelVersion>4.0.0</modelVersion>
- <groupId>org.apache.bigtop.itest</groupId>
- <artifactId>datafu-smoke</artifactId>
- <version>1.3.1-SNAPSHOT</version>
- <name>datafusmoke</name>
-
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.pig</groupId>
- <artifactId>pig</artifactId>
- <version>0.11.1</version>
- </dependency>
- <dependency>
- <groupId>org.apache.pig</groupId>
- <artifactId>pigunit</artifactId>
- <version>0.11.1</version>
- </dependency>
- <dependency>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- <version>r06</version>
- </dependency>
- <dependency>
- <groupId>joda-time</groupId>
- <artifactId>joda-time</artifactId>
- <version>1.6</version>
- </dependency>
- <dependency>
- <groupId>it.unimi.dsi</groupId>
- <artifactId>fastutil</artifactId>
- <version>6.3</version>
- </dependency>
- </dependencies>
-
- <build>
- </build>
-
-</project>
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/linkanalysis/PageRank.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/linkanalysis/PageRank.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/linkanalysis/PageRank.java
deleted file mode 100644
index 2cadcf9..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/linkanalysis/PageRank.java
+++ /dev/null
@@ -1,441 +0,0 @@
-/*
- * Copyright 2010 LinkedIn, Inc
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package datafu.linkanalysis;
-
-import it.unimi.dsi.fastutil.floats.FloatArrayList;
-import it.unimi.dsi.fastutil.ints.Int2IntMap;
-import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
-import it.unimi.dsi.fastutil.ints.IntArrayList;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.Map;
-
-import com.google.common.collect.AbstractIterator;
-
-/**
- * An implementation of {@link <a href="http://en.wikipedia.org/wiki/PageRank" target="_blank">PageRank</a>}.
- * This implementation is not distributed. It is intended for graphs of a reasonable size which can be processed
- * on a single machine. Nodes are stored in memory. Edges are stored in memory and can optionally be spilled to
- * disk once a certain limit is reached.
- */
-public class PageRank
-{
- private float totalRankChange;
- private long edgeCount;
- private long nodeCount;
-
- // the damping factor
- private static float ALPHA = 0.85f;
-
- // edge weights (which are doubles) are multiplied by this value so they can be stored as integers internally
- private static float EDGE_WEIGHT_MULTIPLIER = 100000;
-
- private final Int2IntOpenHashMap nodeIndices = new Int2IntOpenHashMap();
- private final FloatArrayList nodeData = new FloatArrayList(); // rank, total weight, contribution, (repeat)
-
- private final IntArrayList danglingNodes = new IntArrayList();
-
- private final IntArrayList edges = new IntArrayList(); // source, dest node count... dest id, weight pos, (repeat)
-
- private boolean shouldHandleDanglingNodes = false;
- private boolean shouldCacheEdgesOnDisk = false;
- private long edgeCachingThreshold;
-
- private File edgesFile;
- private DataOutputStream edgeDataOutputStream;
- private boolean usingEdgeDiskCache;
-
- public interface ProgressIndicator
- {
- void progress();
- }
-
- public void clear() throws IOException
- {
- this.edgeCount = 0;
- this.nodeCount = 0;
- this.totalRankChange = 0.0f;
-
- this.nodeIndices.clear();
- this.nodeData.clear();
- this.edges.clear();
- this.danglingNodes.clear();
-
- if (edgeDataOutputStream != null)
- {
- this.edgeDataOutputStream.close();
- this.edgeDataOutputStream = null;
- }
-
- this.usingEdgeDiskCache = false;
- this.edgesFile = null;
- }
-
- /**
- * Gets whether disk is being used to cache edges.
- * @return True if the edges are cached on disk.
- */
- public boolean isUsingEdgeDiskCache()
- {
- return usingEdgeDiskCache;
- }
-
- /**
- * Enable disk caching of edges once there are too many (disabled by default).
- */
- public void enableEdgeDiskCaching()
- {
- shouldCacheEdgesOnDisk = true;
- }
-
- /**
- * Disable disk caching of edges once there are too many (disabled by default).
- */
- public void disableEdgeDiskCaching()
- {
- shouldCacheEdgesOnDisk = false;
- }
-
- /**
- * Gets whether edge disk caching is enabled.
- * @return True if edge disk caching is enabled.
- */
- public boolean isEdgeDiskCachingEnabled()
- {
- return shouldCacheEdgesOnDisk;
- }
-
- /**
- * Gets the number of edges past which they will be cached on disk instead of in memory.
- * Edge disk caching must be enabled for this to have any effect.
- * @return Edge count past which caching occurs
- */
- public long getEdgeCachingThreshold()
- {
- return edgeCachingThreshold;
- }
-
- /**
- * Set the number of edges past which they will be cached on disk instead of in memory.
- * Edge disk caching must be enabled for this to have any effect.
- * @param count Edge count past which caching occurs
- */
- public void setEdgeCachingThreshold(long count)
- {
- edgeCachingThreshold = count;
- }
-
- /**
- * Enables dangling node handling (disabled by default).
- */
- public void enableDanglingNodeHandling()
- {
- shouldHandleDanglingNodes = true;
- }
-
- /**
- * Disables dangling node handling (disabled by default).
- */
- public void disableDanglingNodeHandling()
- {
- shouldHandleDanglingNodes = false;
- }
-
- public long nodeCount()
- {
- return this.nodeCount;
- }
-
- public long edgeCount()
- {
- return this.edgeCount;
- }
-
- public Int2IntMap.FastEntrySet getNodeIds()
- {
- return this.nodeIndices.int2IntEntrySet();
- }
-
- public float getNodeRank(int nodeId)
- {
- int nodeIndex = this.nodeIndices.get(nodeId);
- return nodeData.get(nodeIndex);
- }
-
- public float getTotalRankChange()
- {
- return this.totalRankChange;
- }
-
- private void maybeCreateNode(int nodeId)
- {
- // create from node if it doesn't already exist
- if (!nodeIndices.containsKey(nodeId))
- {
- int index = this.nodeData.size();
-
- this.nodeData.add(0.0f); // rank
- this.nodeData.add(0.0f); // total weight
- this.nodeData.add(0.0f); // contribution
-
- this.nodeIndices.put(nodeId, index);
-
- this.nodeCount++;
- }
- }
-
- public void addEdges(Integer sourceId, ArrayList<Map<String,Object>> sourceEdges) throws IOException
- {
- int source = sourceId.intValue();
-
- maybeCreateNode(source);
-
- if (this.shouldCacheEdgesOnDisk && !usingEdgeDiskCache && (sourceEdges.size() + this.edgeCount) >= this.edgeCachingThreshold)
- {
- writeEdgesToDisk();
- }
-
- // store the source node id itself
- appendEdgeData(source);
-
- // store how many outgoing edges this node has
- appendEdgeData(sourceEdges.size());
-
- // store the outgoing edges
- for (Map<String,Object> edge : sourceEdges)
- {
- int dest = ((Integer)edge.get("dest")).intValue();
- float weight = ((Double)edge.get("weight")).floatValue();
-
- maybeCreateNode(dest);
-
- appendEdgeData(dest);
-
- // location of weight in weights array
- appendEdgeData(Math.max(1, (int)(weight * EDGE_WEIGHT_MULTIPLIER)));
-
- this.edgeCount++;
- }
- }
-
- private void appendEdgeData(int data) throws IOException
- {
- if (this.edgeDataOutputStream != null)
- {
- this.edgeDataOutputStream.writeInt(data);
- }
- else
- {
- this.edges.add(data);
- }
- }
-
- public void init(ProgressIndicator progressIndicator) throws IOException
- {
- if (this.edgeDataOutputStream != null)
- {
- this.edgeDataOutputStream.close();
- this.edgeDataOutputStream = null;
- }
-
- // initialize all nodes to an equal share of the total rank (1.0)
- float nodeRank = 1.0f / this.nodeCount;
- for (int j=0; j<this.nodeData.size(); j+=3)
- {
- nodeData.set(j, nodeRank);
- progressIndicator.progress();
- }
-
- Iterator<Integer> edgeData = getEdgeData();
-
- while(edgeData.hasNext())
- {
- int sourceId = edgeData.next();
- int nodeEdgeCount = edgeData.next();
-
- while (nodeEdgeCount-- > 0)
- {
- // skip the destination node id
- edgeData.next();
-
- float weight = edgeData.next();
-
- int nodeIndex = this.nodeIndices.get(sourceId);
-
- float totalWeight = this.nodeData.getFloat(nodeIndex+1);
- totalWeight += weight;
- this.nodeData.set(nodeIndex+1, totalWeight);
-
- progressIndicator.progress();
- }
- }
-
- // if handling dangling nodes, get a list of them by finding those nodes with no outgoing
- // edges (i.e. total outgoing edge weight is 0.0)
- if (shouldHandleDanglingNodes)
- {
- for (Map.Entry<Integer,Integer> e : nodeIndices.entrySet())
- {
- int nodeId = e.getKey();
- int nodeIndex = e.getValue();
- float totalWeight = nodeData.getFloat(nodeIndex+1);
- if (totalWeight == 0.0f)
- {
- danglingNodes.add(nodeId);
- }
- }
- }
- }
-
- public float nextIteration(ProgressIndicator progressIndicator) throws IOException
- {
- distribute(progressIndicator);
- commit(progressIndicator);
-
- return getTotalRankChange();
- }
-
- public void distribute(ProgressIndicator progressIndicator) throws IOException
- {
- Iterator<Integer> edgeData = getEdgeData();
-
- while(edgeData.hasNext())
- {
- int sourceId = edgeData.next();
- int nodeEdgeCount = edgeData.next();
-
- while (nodeEdgeCount-- > 0)
- {
- int toId = edgeData.next();
- float weight = edgeData.next();
-
- int fromNodeIndex = this.nodeIndices.get(sourceId);
- int toNodeIndex = this.nodeIndices.get(toId);
-
- float contributionChange = weight * this.nodeData.getFloat(fromNodeIndex) / this.nodeData.getFloat(fromNodeIndex+1);
-
- float currentContribution = this.nodeData.getFloat(toNodeIndex+2);
- this.nodeData.set(toNodeIndex+2, currentContribution + contributionChange);
-
- progressIndicator.progress();
- }
- }
-
- if (shouldHandleDanglingNodes)
- {
- // get the rank from each of the dangling nodes
- float totalRank = 0.0f;
- for (int nodeId : danglingNodes)
- {
- int nodeIndex = nodeIndices.get(nodeId);
- float rank = nodeData.get(nodeIndex);
- totalRank += rank;
- }
-
- // distribute the dangling node ranks to all the nodes in the graph
- // note: the alpha factor is applied in the commit stage
- float contributionIncrease = totalRank / this.nodeCount;
- for (int i=2; i<nodeData.size(); i += 3)
- {
- float contribution = nodeData.getFloat(i);
- contribution += contributionIncrease;
- nodeData.set(i, contribution);
- }
- }
- }
-
- public void commit(ProgressIndicator progressIndicator)
- {
- this.totalRankChange = 0.0f;
-
- for (int id : nodeIndices.keySet())
- {
- int nodeIndex = this.nodeIndices.get(id);
-
- float alpha = datafu.linkanalysis.PageRank.ALPHA;
- float newRank = (1.0f - alpha)/nodeCount + alpha * this.nodeData.get(nodeIndex+2);
-
- this.nodeData.set(nodeIndex+2, 0.0f);
-
- float lastRankDiff = newRank - this.nodeData.get(nodeIndex);
-
- this.nodeData.set(nodeIndex, newRank);
-
- this.totalRankChange += Math.abs(lastRankDiff);
-
- progressIndicator.progress();
- }
- }
-
- private void writeEdgesToDisk() throws IOException
- {
- this.edgesFile = File.createTempFile("fastgraph", null);
-
- FileOutputStream outStream = new FileOutputStream(this.edgesFile);
- BufferedOutputStream bufferedStream = new BufferedOutputStream(outStream);
- this.edgeDataOutputStream = new DataOutputStream(bufferedStream);
-
- for (int edgeData : edges)
- {
- this.edgeDataOutputStream.writeInt(edgeData);
- }
-
- this.edges.clear();
- usingEdgeDiskCache = true;
- }
-
- private Iterator<Integer> getEdgeData() throws IOException
- {
- if (!usingEdgeDiskCache)
- {
- return this.edges.iterator();
- }
- else
- {
- FileInputStream fileInputStream = new FileInputStream(this.edgesFile);
- BufferedInputStream inputStream = new BufferedInputStream(fileInputStream);
- final DataInputStream dataInputStream = new DataInputStream(inputStream);
-
- return new AbstractIterator<Integer>() {
-
- @Override
- protected Integer computeNext()
- {
- try
- {
- return dataInputStream.readInt();
- }
- catch (IOException e)
- {
- return endOfData();
- }
- }
-
- };
- }
- }
-}
-
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/pig/linkanalysis/PageRank.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/pig/linkanalysis/PageRank.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/pig/linkanalysis/PageRank.java
deleted file mode 100644
index 2460fc2..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/java/datafu/pig/linkanalysis/PageRank.java
+++ /dev/null
@@ -1,372 +0,0 @@
-/*
- * Copyright 2010 LinkedIn, Inc
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package datafu.pig.linkanalysis;
-
-import it.unimi.dsi.fastutil.ints.Int2IntMap;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.pig.Accumulator;
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.BagFactory;
-import org.apache.pig.data.DataBag;
-import org.apache.pig.data.DataType;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-import org.apache.pig.impl.logicalLayer.FrontendException;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
-
-import datafu.linkanalysis.PageRank.ProgressIndicator;
-
-
-/**
- * A UDF which implements {@link <a href="http://en.wikipedia.org/wiki/PageRank" target="_blank">PageRank</a>}.
- * Each graph is stored in memory while running the algorithm, with edges optionally
- * spilled to disk to conserve memory. This can be used to distribute the execution of PageRank on a large number of
- * reasonable sized graphs. It does not distribute execuion of PageRank on a single graph. Each graph is identified
- * by an integer valued topic ID.
- * <p>
- * Example:
- * <pre>
- * {@code
- *
- * topic_edges = LOAD 'input_edges' as (topic:INT,source:INT,dest:INT,weight:DOUBLE);
- *
- * topic_edges_grouped = GROUP topic_edges by (topic, source) ;
- * topic_edges_grouped = FOREACH topic_edges_grouped GENERATE
- * group.topic as topic,
- * group.source as source,
- * topic_edges.(dest,weight) as edges;
- *
- * topic_edges_grouped_by_topic = GROUP topic_edges_grouped BY topic;
- *
- * topic_ranks = FOREACH topic_edges_grouped_by_topic GENERATE
- * group as topic,
- * FLATTEN(PageRank(topic_edges_grouped.(source,edges))) as (source,rank);
- *
- * skill_ranks = FOREACH skill_ranks GENERATE
- * topic, source, rank;
- *
- * }
- * </pre>
- */
-public class PageRank extends EvalFunc<DataBag> implements Accumulator<DataBag>
-{
- private final datafu.linkanalysis.PageRank graph = new datafu.linkanalysis.PageRank();
-
- private int maxNodesAndEdges = 100000000;
- private int maxEdgesInMemory = 30000000;
- private double tolerance = 1e-16;
- private int maxIters = 150;
- private boolean useEdgeDiskStorage = false;
- private boolean enableDanglingNodeHandling = false;
- private boolean aborted = false;
-
- TupleFactory tupleFactory = TupleFactory.getInstance();
- BagFactory bagFactory = BagFactory.getInstance();
-
- public PageRank()
- {
- initialize();
- }
-
- public PageRank(String... parameters)
- {
- if (parameters.length % 2 != 0)
- {
- throw new RuntimeException("Invalid parameters list");
- }
-
- for (int i=0; i<parameters.length; i+=2)
- {
- String parameterName = parameters[i];
- String value = parameters[i+1];
- if (parameterName.equals("max_nodes_and_edges"))
- {
- maxNodesAndEdges = Integer.parseInt(value);
- }
- else if (parameterName.equals("max_edges_in_memory"))
- {
- maxEdgesInMemory = Integer.parseInt(value);
- }
- else if (parameterName.equals("tolerance"))
- {
- tolerance = Double.parseDouble(value);
- }
- else if (parameterName.equals("max_iters"))
- {
- maxIters = Integer.parseInt(value);
- }
- else if (parameterName.equals("spill_to_edge_disk_storage"))
- {
- useEdgeDiskStorage = Boolean.parseBoolean(value);
- }
- else if (parameterName.equals("dangling_nodes"))
- {
- enableDanglingNodeHandling = Boolean.parseBoolean(value);
- }
- }
-
- initialize();
- }
-
- private void initialize()
- {
- long heapSize = Runtime.getRuntime().totalMemory();
- long heapMaxSize = Runtime.getRuntime().maxMemory();
- long heapFreeSize = Runtime.getRuntime().freeMemory();
-// System.out.println(String.format("Heap size: %d, Max heap size: %d, Heap free size: %d", heapSize, heapMaxSize, heapFreeSize));
-
- if (useEdgeDiskStorage)
- {
- this.graph.enableEdgeDiskCaching();
- }
- else
- {
- this.graph.disableEdgeDiskCaching();
- }
-
- if (enableDanglingNodeHandling)
- {
- this.graph.enableDanglingNodeHandling();
- }
- else
- {
- this.graph.disableDanglingNodeHandling();
- }
-
- this.graph.setEdgeCachingThreshold(maxEdgesInMemory);
- }
-
- @Override
- public void accumulate(Tuple t) throws IOException
- {
- if (aborted)
- {
- return;
- }
-
- DataBag bag = (DataBag) t.get(0);
- if (bag == null || bag.size() == 0)
- return;
-
- for (Tuple sourceTuple : bag)
- {
- Integer sourceId = (Integer)sourceTuple.get(0);
- DataBag edges = (DataBag)sourceTuple.get(1);
-
- ArrayList<Map<String,Object>> edgesMapList = new ArrayList<Map<String, Object>>();
-
- for (Tuple edgeTuple : edges)
- {
- Integer destId = (Integer)edgeTuple.get(0);
- Double weight = (Double)edgeTuple.get(1);
- HashMap<String,Object> edgeMap = new HashMap<String, Object>();
- edgeMap.put("dest",destId);
- edgeMap.put("weight",weight);
- edgesMapList.add(edgeMap);
- }
-
- graph.addEdges(sourceId, edgesMapList);
-
- if (graph.nodeCount() + graph.edgeCount() > maxNodesAndEdges)
- {
- System.out.println(String.format("There are too many nodes and edges (%d + %d > %d). Aborting.", graph.nodeCount(), graph.edgeCount(), maxNodesAndEdges));
- aborted = true;
- }
-
- reporter.progress();
- }
- }
-
- @Override
- public DataBag getValue()
- {
- if (aborted)
- {
- return null;
- }
-
- System.out.println(String.format("Nodes: %d, Edges: %d", graph.nodeCount(), graph.edgeCount()));
-
- ProgressIndicator progressIndicator = getProgressIndicator();
- System.out.println("Finished loading graph.");
- long startTime = System.nanoTime();
- System.out.println("Initializing.");
- try
- {
- graph.init(progressIndicator);
- }
- catch (IOException e)
- {
- e.printStackTrace();
- return null;
- }
- System.out.println(String.format("Done, took %f ms", (System.nanoTime() - startTime)/10.0e6));
-
- float totalDiff;
- int iter = 0;
-
- System.out.println("Beginning iterations");
- startTime = System.nanoTime();
- do
- {
- // TODO log percentage complete every 5 minutes
- try
- {
- totalDiff = graph.nextIteration(progressIndicator);
- }
- catch (IOException e)
- {
- e.printStackTrace();
- return null;
- }
- iter++;
- } while(iter < maxIters && totalDiff > tolerance);
- System.out.println(String.format("Done, %d iterations took %f ms", iter, (System.nanoTime() - startTime)/10.0e6));
-
- DataBag output = bagFactory.newDefaultBag();
-
- for (Int2IntMap.Entry node : graph.getNodeIds())
- {
- int nodeId = node.getIntKey();
- float rank = graph.getNodeRank(nodeId);
- List nodeData = new ArrayList(2);
- nodeData.add(nodeId);
- nodeData.add(rank);
- output.add(tupleFactory.newTuple(nodeData));
- }
-
- return output;
- }
-
- @Override
- public void cleanup()
- {
- try
- {
- aborted = false;
- this.graph.clear();
- }
- catch (IOException e)
- {
- e.printStackTrace();
- }
- }
-
- @Override
- public DataBag exec(Tuple input) throws IOException
- {
- try
- {
- accumulate(input);
-
- return getValue();
- }
- finally
- {
- cleanup();
- }
- }
-
- private ProgressIndicator getProgressIndicator()
- {
- return new ProgressIndicator()
- {
- @Override
- public void progress()
- {
- reporter.progress();
- }
- };
- }
-
- @Override
- public Schema outputSchema(Schema input)
- {
- try
- {
- Schema.FieldSchema inputFieldSchema = input.getField(0);
-
- if (inputFieldSchema.type != DataType.BAG)
- {
- throw new RuntimeException("Expected a BAG as input");
- }
-
- Schema inputBagSchema = inputFieldSchema.schema;
-
- if (inputBagSchema.getField(0).type != DataType.TUPLE)
- {
- throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
- DataType.findTypeName(inputBagSchema.getField(0).type)));
- }
-
- Schema inputTupleSchema = inputBagSchema.getField(0).schema;
-
- if (inputTupleSchema.getField(0).type != DataType.INTEGER)
- {
- throw new RuntimeException(String.format("Expected source to be an INTEGER, but instead found %s",
- DataType.findTypeName(inputTupleSchema.getField(0).type)));
- }
-
- if (inputTupleSchema.getField(1).type != DataType.BAG)
- {
- throw new RuntimeException(String.format("Expected edges to be represented with a BAG"));
- }
-
- Schema.FieldSchema edgesFieldSchema = inputTupleSchema.getField(1);
-
- if (edgesFieldSchema.schema.getField(0).type != DataType.TUPLE)
- {
- throw new RuntimeException(String.format("Expected edges field to contain a TUPLE, but instead found %s",
- DataType.findTypeName(edgesFieldSchema.schema.getField(0).type)));
- }
-
- Schema edgesTupleSchema = edgesFieldSchema.schema.getField(0).schema;
-
- if (edgesTupleSchema.getField(0).type != DataType.INTEGER)
- {
- throw new RuntimeException(String.format("Expected destination edge ID to an INTEGER, but instead found %s",
- DataType.findTypeName(edgesFieldSchema.schema.getField(0).type)));
- }
-
- if (edgesTupleSchema.getField(1).type != DataType.DOUBLE)
- {
- throw new RuntimeException(String.format("Expected destination edge weight to a DOUBLE, but instead found %s",
- DataType.findTypeName(edgesFieldSchema.schema.getField(1).type)));
- }
-
- Schema tupleSchema = new Schema();
- tupleSchema.add(new Schema.FieldSchema("node",DataType.INTEGER));
- tupleSchema.add(new Schema.FieldSchema("rank",DataType.FLOAT));
-
- return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
- .getName()
- .toLowerCase(), input),
- tupleSchema,
- DataType.BAG));
- }
- catch (FrontendException e)
- {
- throw new RuntimeException(e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/PigTests.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/PigTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/PigTests.java
deleted file mode 100644
index 8b11111..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/PigTests.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu;
-
-import static org.junit.Assert.*;
-
-import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileWriter;
-import java.io.FilenameFilter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.regex.Pattern;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.apache.pig.tools.parameters.ParseException;
-
-public abstract class PigTests
-{
- protected String[] getDefaultArgs()
- {
- String[] args = {
- "JAR_PATH=" + getJarPath()
- };
- return args;
- }
-
- protected List<String> getDefaultArgsAsList()
- {
- String[] args = getDefaultArgs();
- List<String> argsList = new ArrayList<String>(args.length);
- for (String arg : args)
- {
- argsList.add(arg);
- }
- return argsList;
- }
-
- protected PigTest createPigTest(String scriptPath, String... args) throws IOException
- {
- // append args to list of default args
- List<String> theArgs = getDefaultArgsAsList();
- for (String arg : args)
- {
- theArgs.add(arg);
- }
-
- String[] lines = getLinesFromFile(scriptPath);
-
- for (String arg : theArgs)
- {
- String[] parts = arg.split("=",2);
- if (parts.length == 2)
- {
- for (int i=0; i<lines.length; i++)
- {
- lines[i] = lines[i].replaceAll(Pattern.quote("$" + parts[0]), parts[1]);
- }
- }
- }
-
- return new PigTest(lines);
- }
-
- protected PigTest createPigTest(String scriptPath) throws IOException
- {
- return createPigTest(scriptPath, getDefaultArgs());
- }
-
- protected String getJarPath()
- {
- String jarDir = "dist";
- if (System.getProperty("datafu.jar.dir") != null)
- {
- jarDir = System.getProperty("datafu.jar.dir");
- }
-
- String jarDirPath = new File(/* System.getProperty("user.dir"), */ jarDir).getAbsolutePath();
-
- File userDir = new File(jarDirPath);
-
- String[] files = userDir.list(new FilenameFilter() {
-
- @Override
- public boolean accept(File dir, String name)
- {
- return name.startsWith("datafu") && name.endsWith(".jar") && !name.contains("sources") && !name.contains("javadoc");
- }
-
- });
-
- if (files.length == 0)
- {
- throw new RuntimeException("Could not find JAR file");
- }
- else if (files.length > 1)
- {
- throw new RuntimeException("Found more JAR files than expected");
- }
-
- return userDir.getAbsolutePath() + "/" + files[0];
- }
-
- protected List<Tuple> getLinesForAlias(PigTest test, String alias) throws IOException, ParseException
- {
- return getLinesForAlias(test,alias,true);
- }
-
- protected List<Tuple> getLinesForAlias(PigTest test, String alias, boolean logValues) throws IOException, ParseException
- {
- Iterator<Tuple> tuplesIterator = test.getAlias(alias);
- List<Tuple> tuples = new ArrayList<Tuple>();
- if (logValues)
- {
- System.out.println(String.format("Values for %s: ", alias));
- }
- while (tuplesIterator.hasNext())
- {
- Tuple tuple = tuplesIterator.next();
- if (logValues)
- {
- System.out.println(tuple.toString());
- }
- tuples.add(tuple);
- }
- return tuples;
- }
-
- protected void writeLinesToFile(String fileName, String... lines) throws IOException
- {
- File inputFile = deleteIfExists(getFile(fileName));
- writeLinesToFile(inputFile, lines);
- }
-
- protected void writeLinesToFile(File file, String[] lines) throws IOException
- {
- FileWriter writer = new FileWriter(file);
- for (String line : lines)
- {
- writer.write(line + "\n");
- }
- writer.close();
- }
-
- protected void assertOutput(PigTest test, String alias, String... expected) throws IOException, ParseException
- {
- List<Tuple> tuples = getLinesForAlias(test, alias);
- assertEquals(expected.length, tuples.size());
- int i=0;
- for (String e : expected)
- {
- assertEquals(e, tuples.get(i++).toString());
- }
- }
-
- protected File deleteIfExists(File file)
- {
- if (file.exists())
- {
- file.delete();
- }
- return file;
- }
-
- protected File getFile(String fileName)
- {
- return new File(System.getProperty("user.dir"), fileName).getAbsoluteFile();
- }
-
- /**
- * Gets the lines from a given file.
- *
- * @param relativeFilePath The path relative to the datafu-tests project.
- * @return The lines from the file
- * @throws IOException
- */
- protected String[] getLinesFromFile(String relativeFilePath) throws IOException
- {
- // assume that the working directory is the datafu-tests project
- File file = new File(System.getProperty("user.dir"), relativeFilePath).getAbsoluteFile();
- BufferedInputStream content = new BufferedInputStream(new FileInputStream(file));
- Object[] lines = IOUtils.readLines(content).toArray();
- String[] result = new String[lines.length];
- for (int i=0; i<lines.length; i++)
- {
- result[i] = (String)lines[i];
- }
- return result;
- }
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/BagTests.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/BagTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/BagTests.java
deleted file mode 100644
index 8e72846..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/BagTests.java
+++ /dev/null
@@ -1,308 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.bags;
-
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-
-public class BagTests extends PigTests
-{
- @Test
- public void nullToEmptyBagTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/nullToEmptyBagTest.pig");
-
- writeLinesToFile("input",
- "({(1),(2),(3),(4),(5)})",
- "()",
- "{(4),(5)})");
-
- test.runScript();
-
- assertOutput(test, "data2",
- "({(1),(2),(3),(4),(5)})",
- "({})",
- "({(4),(5)})");
- }
-
- @Test
- public void appendToBagTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/appendToBagTest.pig");
-
- writeLinesToFile("input",
- "1\t{(1),(2),(3)}\t(4)",
- "2\t{(10),(20),(30),(40),(50)}\t(60)");
-
- test.runScript();
-
- assertOutput(test, "data2",
- "(1,{(1),(2),(3),(4)})",
- "(2,{(10),(20),(30),(40),(50),(60)})");
- }
-
- @Test
- public void firstTupleFromBagTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/firstTupleFromBagTest.pig");
-
- writeLinesToFile("input", "1\t{(4),(9),(16)}");
-
- test.runScript();
-
- assertOutput(test, "data2", "(1,(4))");
- }
-
-
- @Test
- public void prependToBagTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/prependToBagTest.pig");
-
- writeLinesToFile("input",
- "1\t{(1),(2),(3)}\t(4)",
- "2\t{(10),(20),(30),(40),(50)}\t(60)");
-
- test.runScript();
-
- assertOutput(test, "data2",
- "(1,{(4),(1),(2),(3)})",
- "(2,{(60),(10),(20),(30),(40),(50)})");
- }
-
- @Test
- public void bagConcatTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/bagConcatTest.pig");
-
- writeLinesToFile("input",
- "({(1),(2),(3)}\t{(3),(5),(6)}\t{(10),(13)})",
- "({(2),(3),(4)}\t{(5),(5)}\t{(20)})");
-
- test.runScript();
-
- assertOutput(test, "data2",
- "({(1),(2),(3),(3),(5),(6),(10),(13)})",
- "({(2),(3),(4),(5),(5),(20)})");
- }
-
- @Test
- public void unorderedPairsTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/unorderedPairsTests.pig");
-
- String[] input = {
- "{(1),(2),(3),(4),(5)}"
- };
-
- String[] output = {
- "(1,2)",
- "(1,3)",
- "(1,4)",
- "(1,5)",
- "(2,3)",
- "(2,4)",
- "(2,5)",
- "(3,4)",
- "(3,5)",
- "(4,5)"
- };
-
- test.assertOutput("data",input,"data4",output);
- }
-
- @Test
- public void unorderedPairsTest2() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/unorderedPairsTests2.pig");
-
- this.writeLinesToFile("input", "1\t{(1),(2),(3),(4),(5)}");
-
- String[] output = {
- "(1,2)",
- "(1,3)",
- "(1,4)",
- "(1,5)",
- "(2,3)",
- "(2,4)",
- "(2,5)",
- "(3,4)",
- "(3,5)",
- "(4,5)"
- };
-
- test.runScript();
- this.getLinesForAlias(test, "data3");
-
- this.assertOutput(test, "data3",
- "(1,(1),(2))",
- "(1,(1),(3))",
- "(1,(1),(4))",
- "(1,(1),(5))",
- "(1,(2),(3))",
- "(1,(2),(4))",
- "(1,(2),(5))",
- "(1,(3),(4))",
- "(1,(3),(5))",
- "(1,(4),(5))");
- }
-
- @Test
- public void bagSplitTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/bagSplitTest.pig",
- "MAX=5");
-
- writeLinesToFile("input",
- "{(1,11),(2,22),(3,33),(4,44),(5,55),(6,66),(7,77),(8,88),(9,99),(10,1010),(11,1111),(12,1212)}");
-
- test.runScript();
-
- assertOutput(test, "data3",
- "({(1,11),(2,22),(3,33),(4,44),(5,55)})",
- "({(6,66),(7,77),(8,88),(9,99),(10,1010)})",
- "({(11,1111),(12,1212)})");
- }
-
- @Test
- public void bagSplitWithBagNumTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/bagSplitWithBagNumTest.pig",
- "MAX=10");
-
- writeLinesToFile("input",
- "{(1,11),(2,22),(3,33),(4,44),(5,55),(6,66),(7,77),(8,88),(9,99),(10,1010),(11,1111),(12,1212)}");
-
- test.runScript();
-
- assertOutput(test, "data3",
- "({(1,11),(2,22),(3,33),(4,44),(5,55),(6,66),(7,77),(8,88),(9,99),(10,1010)},0)",
- "({(11,1111),(12,1212)},1)");
- }
-
- @Test
- public void enumerateWithReverseTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/enumerateWithReverseTest.pig");
-
- writeLinesToFile("input",
- "({(10,{(1),(2),(3)}),(20,{(4),(5),(6)}),(30,{(7),(8)}),(40,{(9),(10),(11)}),(50,{(12),(13),(14),(15)})})");
-
- test.runScript();
-
- assertOutput(test, "data4",
- "(10,{(1),(2),(3)},5)",
- "(20,{(4),(5),(6)},4)",
- "(30,{(7),(8)},3)",
- "(40,{(9),(10),(11)},2)",
- "(50,{(12),(13),(14),(15)},1)");
- }
-
- @Test
- public void enumerateWithStartTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/enumerateWithStartTest.pig");
-
- writeLinesToFile("input",
- "({(10,{(1),(2),(3)}),(20,{(4),(5),(6)}),(30,{(7),(8)}),(40,{(9),(10),(11)}),(50,{(12),(13),(14),(15)})})");
-
- test.runScript();
-
- assertOutput(test, "data4",
- "(10,{(1),(2),(3)},1)",
- "(20,{(4),(5),(6)},2)",
- "(30,{(7),(8)},3)",
- "(40,{(9),(10),(11)},4)",
- "(50,{(12),(13),(14),(15)},5)");
- }
-
- @Test
- public void enumerateTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/enumerateTest.pig");
-
- writeLinesToFile("input",
- "({(10,{(1),(2),(3)}),(20,{(4),(5),(6)}),(30,{(7),(8)}),(40,{(9),(10),(11)}),(50,{(12),(13),(14),(15)})})");
-
- test.runScript();
-
- assertOutput(test, "data4",
- "(10,{(1),(2),(3)},0)",
- "(20,{(4),(5),(6)},1)",
- "(30,{(7),(8)},2)",
- "(40,{(9),(10),(11)},3)",
- "(50,{(12),(13),(14),(15)},4)");
- }
-
- @Test
- public void comprehensiveBagSplitAndEnumerate() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/comprehensiveBagSplitAndEnumerate.pig");
-
- writeLinesToFile("input",
- "({(A,1.0),(B,2.0),(C,3.0),(D,4.0),(E,5.0)})");
-
- test.runScript();
-
- assertOutput(test, "data_out",
- // bag #1
- "(A,1.0,1)",
- "(B,2.0,1)",
- "(C,3.0,1)",
- // bag #2
- "(D,4.0,2)",
- "(E,5.0,2)");
- }
-
- @Test
- public void aliasBagFieldsTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/aliasBagFieldsTest.pig");
-
- writeLinesToFile("input",
- "({(A,1,0),(B,2,0),(C,3,0),(D,4,0),(E,5,0)})");
-
- test.runScript();
-
- assertOutput(test, "data4",
- "(A,1)",
- "(B,2)",
- "(C,3)",
- "(D,4)",
- "(E,5)");
- }
-
- @Test
- public void distinctByTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/distinctByTest.pig");
-
- writeLinesToFile("input",
- "({(Z,1,0),(A,1,0),(A,1,0),(B,2,0),(B,22,1),(C,3,0),(D,4,0),(E,5,0)})");
-
- test.runScript();
-
- assertOutput(test, "data2",
- "({(Z,1,0),(A,1,0),(B,2,0),(C,3,0),(D,4,0),(E,5,0)})");
- }
-
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/sets/SetTests.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/sets/SetTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/sets/SetTests.java
deleted file mode 100644
index 938ef3a..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/bags/sets/SetTests.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.bags.sets;
-
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class SetTests extends PigTests
-{
- @Test
- public void setIntersectTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/sets/setIntersectTest.pig");
-
- String[] input = {
- "{(1,10),(2,20),(3,30),(4,40),(5,50),(6,60)}\t{(0,0),(2,20),(4,40),(8,80)}",
- "{(1,10),(1,10),(2,20),(3,30),(3,30),(4,40),(4,40)}\t{(1,10),(3,30)}"
- };
-
- String[] output = {
- "({(2,20),(4,40)})",
- "({(1,10),(3,30)})"
- };
-
- test.assertOutput("data",input,"data2",output);
- }
-
- @Test
- public void setIntersectOutOfOrderTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/sets/setIntersectTest.pig");
-
- this.writeLinesToFile("input",
- "{(1,10),(3,30),(2,20),(4,40),(5,50),(6,60)}\t{(0,0),(2,20),(4,40),(8,80)}");
-
- test.runScript();
-
- this.getLinesForAlias(test, "data2");
- }
-
- @Test
- public void setUnionTest() throws Exception
- {
- PigTest test = createPigTest("datafu/bags/sets/setUnionTest.pig");
-
- String[] input = {
- "{(1,10),(1,20),(1,30),(1,40),(1,50),(1,60),(1,80)}\t{(1,1),(1,20),(1,25),(1,25),(1,25),(1,40),(1,70),(1,80)}"
- };
-
- String[] output = {
- "({(1,10),(1,20),(1,30),(1,40),(1,50),(1,60),(1,80),(1,1),(1,25),(1,70)})"
- };
-
- test.assertOutput("data",input,"data2",output);
- }
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/date/TimeTests.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/date/TimeTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/date/TimeTests.java
deleted file mode 100644
index 87fab7b..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/date/TimeTests.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.date;
-
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class TimeTests extends PigTests
-{
- @Test
- public void timeCountPageViewsTest() throws Exception
- {
- PigTest test = createPigTest("datafu/date/timeCountPageViewsTest.pig",
- "TIME_WINDOW=30m",
- "JAR_PATH=" + getJarPath());
-
- String[] input = {
- "1\t100\t2010-01-01T01:00:00Z",
- "1\t100\t2010-01-01T01:15:00Z",
- "1\t100\t2010-01-01T01:31:00Z",
- "1\t100\t2010-01-01T01:35:00Z",
- "1\t100\t2010-01-01T02:30:00Z",
-
- "1\t101\t2010-01-01T01:00:00Z",
- "1\t101\t2010-01-01T01:31:00Z",
- "1\t101\t2010-01-01T02:10:00Z",
- "1\t101\t2010-01-01T02:40:30Z",
- "1\t101\t2010-01-01T03:30:00Z",
-
- "1\t102\t2010-01-01T01:00:00Z",
- "1\t102\t2010-01-01T01:01:00Z",
- "1\t102\t2010-01-01T01:02:00Z",
- "1\t102\t2010-01-01T01:10:00Z",
- "1\t102\t2010-01-01T01:15:00Z",
- "1\t102\t2010-01-01T01:25:00Z",
- "1\t102\t2010-01-01T01:30:00Z"
- };
-
- String[] output = {
- "(1,100,2)",
- "(1,101,5)",
- "(1,102,1)"
- };
-
- test.assertOutput("views",input,"view_counts",output);
- }
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/geo/GeoTests.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/geo/GeoTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/geo/GeoTests.java
deleted file mode 100644
index 12d9f97..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/geo/GeoTests.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.geo;
-
-import static org.junit.Assert.*;
-
-import java.util.List;
-
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class GeoTests extends PigTests
-{
- @Test
- public void haversineTest() throws Exception
- {
- PigTest test = createPigTest("datafu/geo/haversineTest.pig");
-
- // Approximate latitude and longitude for major cities from maps.google.com
- double[] la = {34.040143,-118.243103};
- double[] tokyo = {35.637209,139.65271};
- double[] ny = {40.716038,-73.99498};
- double[] paris = {48.857713,2.342491};
- double[] sydney = {-33.872696,151.195221};
-
- this.writeLinesToFile("input",
- coords(la,tokyo),
- coords(ny,tokyo),
- coords(ny,sydney),
- coords(ny,paris));
-
- test.runScript();
-
- List<Tuple> distances = this.getLinesForAlias(test, "data2");
-
- // ensure distance is within 20 miles of expected (distances found online)
- assertWithin(5478.0, distances.get(0), 20.0); // la <-> tokyo
- assertWithin(6760.0, distances.get(1), 20.0); // ny <-> tokyo
- assertWithin(9935.0, distances.get(2), 20.0); // ny <-> sydney
- assertWithin(3635.0, distances.get(3), 20.0); // ny <-> paris
-
- }
-
- private void assertWithin(double expected, Tuple actual, double maxDiff) throws Exception
- {
- Double actualVal = (Double)actual.get(0);
- assertTrue(Math.abs(expected-actualVal) < maxDiff);
- }
-
- private String coords(double[] coords1, double[] coords2)
- {
- assertTrue(coords1.length == 2);
- assertTrue(coords2.length == 2);
- return String.format("%f\t%f\t%f\t%f", coords1[0], coords1[1], coords2[0], coords2[1]);
- }
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/hash/HashTests.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/hash/HashTests.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/hash/HashTests.java
deleted file mode 100644
index e900c15..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/hash/HashTests.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.hash;
-
-import org.apache.pig.pigunit.PigTest;
-import org.junit.Test;
-
-import org.apache.bigtop.itest.datafu.PigTests;
-
-public class HashTests extends PigTests
-{
- @Test
- public void md5Test() throws Exception
- {
- PigTest test = createPigTest("datafu/hash/md5Test.pig");
-
- writeLinesToFile("input",
- "ladsljkasdglk",
- "lkadsljasgjskdjks",
- "aladlasdgjks");
-
- test.runScript();
-
- assertOutput(test, "data_out",
- "(d9a82575758bb4978949dc0659205cc6)",
- "(9ec37f02fae0d8d6a7f4453a62272f1f)",
- "(cb94139a8b9f3243e68a898ec6bd9b3d)");
- }
-
- @Test
- public void md5Base64Test() throws Exception
- {
- PigTest test = createPigTest("datafu/hash/md5Base64Test.pig");
-
- writeLinesToFile("input",
- "ladsljkasdglk",
- "lkadsljasgjskdjks",
- "aladlasdgjks");
-
- test.runScript();
-
- assertOutput(test, "data_out",
- "(2agldXWLtJeJSdwGWSBcxg==)",
- "(nsN/Avrg2Nan9EU6YicvHw==)",
- "(y5QTmoufMkPmiomOxr2bPQ==)");
- }
-}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/4cee56bd/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/linkanalysis/PageRankTest.java
----------------------------------------------------------------------
diff --git a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/linkanalysis/PageRankTest.java b/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/linkanalysis/PageRankTest.java
deleted file mode 100644
index 407815c..0000000
--- a/bigtop-tests/test-artifacts/datafu/src/main/groovy/org/apache/bigtop/itest/datafu/linkanalysis/PageRankTest.java
+++ /dev/null
@@ -1,299 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.itest.datafu.linkanalysis;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.junit.Test;
-
-public class PageRankTest
-{
- @Test
- public void wikipediaGraphInMemoryTest() throws Exception {
- System.out.println();
- System.out.println("Starting wikipediaGraphInMemoryTest");
-
- datafu.linkanalysis.PageRank graph = new datafu.linkanalysis.PageRank();
-
- String[] edges = getWikiExampleEdges();
-
- Map<String,Integer> nodeIdsMap = loadGraphFromEdgeList(graph, edges);
-
- // Without dangling node handling we will not get the true page rank since the total rank will
- // not add to 1.0. Without dangling node handling some of the page rank drains out of the graph.
- graph.enableDanglingNodeHandling();
-
- performIterations(graph, 150, 1e-18f);
-
- String[] expectedRanks = getWikiExampleExpectedRanks();
-
- Map<String,Float> expectedRanksMap = parseExpectedRanks(expectedRanks);
-
- validateExpectedRanks(graph, nodeIdsMap, expectedRanksMap);
- }
-
- @Test
- public void wikipediaGraphDiskCacheTest() throws Exception {
- System.out.println();
- System.out.println("Starting wikipediaGraphDiskCacheTest");
-
- datafu.linkanalysis.PageRank graph = new datafu.linkanalysis.PageRank();
-
- String[] edges = getWikiExampleEdges();
-
- graph.enableEdgeDiskCaching();
- graph.setEdgeCachingThreshold(5);
-
- Map<String,Integer> nodeIdsMap = loadGraphFromEdgeList(graph, edges);
-
- assert graph.isUsingEdgeDiskCache() : "Expected disk cache to be used";
-
- // Without dangling node handling we will not get the true page rank since the total rank will
- // not add to 1.0. Without dangling node handling some of the page rank drains out of the graph.
- graph.enableDanglingNodeHandling();
-
- performIterations(graph, 150, 1e-18f);
-
- String[] expectedRanks = getWikiExampleExpectedRanks();
-
- Map<String,Float> expectedRanksMap = parseExpectedRanks(expectedRanks);
-
- validateExpectedRanks(graph, nodeIdsMap, expectedRanksMap);
- }
-
- @Test
- public void hubAndSpokeInMemoryTest() throws Exception {
- System.out.println();
- System.out.println("Starting hubAndSpokeInMemoryTest");
-
- datafu.linkanalysis.PageRank graph = new datafu.linkanalysis.PageRank();
-
- String[] edges = getHubAndSpokeEdges();
-
- Map<String,Integer> nodeIdsMap = loadGraphFromEdgeList(graph, edges);
-
- graph.enableDanglingNodeHandling();
-
- performIterations(graph, 150, 1e-18f);
-
- // no need to validate, this is just a perf test for runtime comparison
- }
-
- @Test
- public void hubAndSpokeDiskCacheTest() throws Exception {
- System.out.println();
- System.out.println("Starting hubAndSpokeDiskCacheTest");
-
- datafu.linkanalysis.PageRank graph = new datafu.linkanalysis.PageRank();
-
- String[] edges = getHubAndSpokeEdges();
-
- graph.enableEdgeDiskCaching();
- graph.setEdgeCachingThreshold(5);
-
- Map<String,Integer> nodeIdsMap = loadGraphFromEdgeList(graph, edges);
-
- graph.enableDanglingNodeHandling();
-
- performIterations(graph, 150, 1e-18f);
-
- // no need to validate, this is just a perf test for runtime comparison
- }
-
- private String[] getHubAndSpokeEdges()
- {
- int count = 50000;
- String[] edges = new String[count];
-
- for (int i=0; i<count; i++)
- {
- edges[i] = String.format("S%d H", i);
- }
- return edges;
- }
-
- public static String[] getWikiExampleEdges()
- {
- // graph taken from:
- // http://en.wikipedia.org/wiki/PageRank
- String[] edges = {
- "B C",
- "C B",
- "D A",
- "D B",
- "E D",
- "E B",
- "E F",
- "F E",
- "F B",
- "P1 B",
- "P1 E",
- "P2 B",
- "P2 E",
- "P3 B",
- "P3 E",
- "P4 E",
- "P5 E"
- };
- return edges;
- }
-
- public static String[] getWikiExampleExpectedRanks()
- {
- // these ranks come from the Wikipedia page:
- // http://en.wikipedia.org/wiki/PageRank
- String[] expectedRanks = {
- "A 3.3",
- "B 38.4",
- "C 34.3",
- "D 3.9",
- "E 8.1",
- "F 3.9",
- "P1 1.6",
- "P2 1.6",
- "P3 1.6",
- "P4 1.6",
- "P5 1.6"
- };
- return expectedRanks;
- }
-
- private Map<String,Integer> loadGraphFromEdgeList(datafu.linkanalysis.PageRank graph, String[] edges) throws IOException
- {
- Map<Integer,ArrayList<Map<String,Object>>> nodeEdgesMap = new HashMap<Integer,ArrayList<Map<String,Object>>>();
- Map<String,Integer> nodeIdsMap = new HashMap<String,Integer>();
-
- for (String edge : edges)
- {
- String[] parts = edge.split(" ");
- assert parts.length == 2 : "Expected two parts";
-
- int sourceId = getOrCreateId(parts[0], nodeIdsMap);
- int destId = getOrCreateId(parts[1], nodeIdsMap);
-
- Map<String,Object> edgeMap = new HashMap<String,Object>();
- edgeMap.put("weight", 1.0);
- edgeMap.put("dest", destId);
-
- ArrayList<Map<String,Object>> nodeEdges = null;
-
- if (nodeEdgesMap.containsKey(sourceId))
- {
- nodeEdges = nodeEdgesMap.get(sourceId);
- }
- else
- {
- nodeEdges = new ArrayList<Map<String,Object>>();
- nodeEdgesMap.put(sourceId, nodeEdges);
- }
-
- nodeEdges.add(edgeMap);
- }
-
- for (Map.Entry<Integer, ArrayList<Map<String,Object>>> e : nodeEdgesMap.entrySet())
- {
- graph.addEdges(e.getKey(), e.getValue());
- }
-
- return nodeIdsMap;
- }
-
- private void performIterations(datafu.linkanalysis.PageRank graph, int maxIters, float tolerance) throws IOException
- {
- System.out.println(String.format("Beginning iteration (maxIters = %d, tolerance=%e)", maxIters, tolerance));
-
- datafu.linkanalysis.PageRank.ProgressIndicator progressIndicator = getDummyProgressIndicator();
-
- System.out.println("Initializing graph");
- long startTime = System.nanoTime();
- graph.init(progressIndicator);
- System.out.println(String.format("Done, took %f ms", (System.nanoTime() - startTime)/10.0e6));
-
- float totalDiff;
- int iter = 0;
-
- System.out.println("Beginning iterations");
- startTime = System.nanoTime();
- do
- {
- totalDiff = graph.nextIteration(progressIndicator);
- iter++;
- } while(iter < maxIters && totalDiff > tolerance);
- System.out.println(String.format("Done, took %f ms", (System.nanoTime() - startTime)/10.0e6));
- }
-
- private datafu.linkanalysis.PageRank.ProgressIndicator getDummyProgressIndicator()
- {
- return new datafu.linkanalysis.PageRank.ProgressIndicator()
- {
- @Override
- public void progress()
- {
- // do nothing
- }
- };
- }
-
- private void validateExpectedRanks(datafu.linkanalysis.PageRank graph, Map<String,Integer> nodeIds, Map<String,Float> expectedRanks)
- {
- System.out.println("Validating page rank results");
-
- for (Map.Entry<String,Integer> e : nodeIds.entrySet())
- {
- float rank = graph.getNodeRank(e.getValue());
-
- float expectedRank = expectedRanks.get(e.getKey());
- // require 0.1% accuracy
- assert (Math.abs(expectedRank - rank*100.0f) < 0.1) : String.format("Did not get expected rank for %s", e.getKey());
- }
-
- System.out.println("All ranks match expected");
- }
-
- public static Map<String,Float> parseExpectedRanks(String[] expectedRanks)
- {
- Map<String,Float> expectedRanksMap = new HashMap<String,Float>();
- for (String expectedRankString : expectedRanks)
- {
- String[] parts = expectedRankString.split(" ");
- assert parts.length == 2 : "Expected two parts";
- String name = parts[0];
- Float expectedRank = Float.parseFloat(parts[1]);
- expectedRanksMap.put(name, expectedRank);
- }
- return expectedRanksMap;
- }
-
- private Integer getOrCreateId(String name, Map<String,Integer> nodeIds)
- {
- if (nodeIds.containsKey(name))
- {
- return nodeIds.get(name);
- }
- else
- {
- Integer id = nodeIds.size();
- nodeIds.put(name, id);
- return id;
- }
- }
-}