You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datafu.apache.org by mh...@apache.org on 2017/08/03 19:04:27 UTC
incubator-datafu git commit: DATAFU-123 Allow DataFu to include macros
Repository: incubator-datafu
Updated Branches:
refs/heads/master c93acb64d -> 5b648e2fb
DATAFU-123 Allow DataFu to include macros
Signed-off-by: Matthew Hayes <mh...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/5b648e2f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/5b648e2f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/5b648e2f
Branch: refs/heads/master
Commit: 5b648e2fbb6f1305e6252805d3a1b5421e0a806d
Parents: c93acb6
Author: Eyal Allweil <ea...@paypal.com>
Authored: Thu Aug 3 12:02:57 2017 -0700
Committer: Matthew Hayes <mh...@apache.org>
Committed: Thu Aug 3 12:02:57 2017 -0700
----------------------------------------------------------------------
.../src/main/resources/datafu/count_macros.pig | 29 ++++++
.../src/test/java/datafu/test/pig/PigTests.java | 9 +-
.../java/datafu/test/pig/macros/MacroTests.java | 102 +++++++++++++++++++
3 files changed, 137 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/5b648e2f/datafu-pig/src/main/resources/datafu/count_macros.pig
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/resources/datafu/count_macros.pig b/datafu-pig/src/main/resources/datafu/count_macros.pig
new file mode 100644
index 0000000..9bebce4
--- /dev/null
+++ b/datafu-pig/src/main/resources/datafu/count_macros.pig
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DEFINE count_all_non_distinct(alias) returns res {
+ grp_all = GROUP $alias ALL;
+ $res = FOREACH grp_all GENERATE COUNT($alias);
+};
+
+DEFINE count_distinct_keys(alias, key) returns res {
+ just_key = FOREACH $alias GENERATE $key;
+ dist_data = DISTINCT just_key;
+ $res = count_all_non_distinct(dist_data);
+};
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/5b648e2f/datafu-pig/src/test/java/datafu/test/pig/PigTests.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/test/java/datafu/test/pig/PigTests.java b/datafu-pig/src/test/java/datafu/test/pig/PigTests.java
index 6fa500d..b869492 100644
--- a/datafu-pig/src/test/java/datafu/test/pig/PigTests.java
+++ b/datafu-pig/src/test/java/datafu/test/pig/PigTests.java
@@ -54,6 +54,8 @@ public abstract class PigTests
Logger.getRootLogger().removeAllAppenders();
Logger.getLogger(JvmMetrics.class).setLevel(Level.OFF);
+ System.setProperty("pig.import.search.path", System.getProperty("user.dir") + File.separator + "src" + File.separator + "main" + File.separator + "resources");
+
// Test files will be created in the following sub-directory
new File(System.getProperty("user.dir") + File.separator + "build", "test-files").mkdir();
}
@@ -233,14 +235,15 @@ public abstract class PigTests
protected void assertOutput(PigTest test, String alias, String... expected) throws IOException, ParseException
{
List<Tuple> tuples = getLinesForAlias(test, alias);
- assertEquals(expected.length, tuples.size());
+ assertEquals(expected.length, tuples.size(), "Mismatch in number of tuples");
int i=0;
for (String e : expected)
{
- assertEquals(tuples.get(i++).toString(), e);
+ String actual = tuples.get(i++).toString();
+ assertEquals(actual, e, "Expected " + e + " but found " + actual);
}
}
-
+
protected File deleteIfExists(File file)
{
if (file.exists())
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/5b648e2f/datafu-pig/src/test/java/datafu/test/pig/macros/MacroTests.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/test/java/datafu/test/pig/macros/MacroTests.java b/datafu-pig/src/test/java/datafu/test/pig/macros/MacroTests.java
new file mode 100644
index 0000000..17d0af5
--- /dev/null
+++ b/datafu-pig/src/test/java/datafu/test/pig/macros/MacroTests.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package datafu.test.pig.macros;
+
+import org.adrianwalker.multilinestring.Multiline;
+import org.apache.pig.pigunit.PigTest;
+import org.testng.annotations.Test;
+
+import datafu.test.pig.PigTests;
+
+public class MacroTests extends PigTests
+{
+ /**
+
+ import 'datafu/count_macros.pig';
+
+ data = LOAD 'input' AS (id:chararray, num:int);
+
+ cnt = count_distinct_keys(data, 'id');
+
+ STORE cnt INTO 'output';
+
+ */
+ @Multiline
+ private String countDistinctTest;
+
+ @Test
+ public void countDistinctTest() throws Exception
+ {
+ PigTest test = createPigTestFromString(countDistinctTest);
+
+ writeLinesToFile("input",
+ "A1\t1","A1\t4","A1\t4","A1\t4",
+ "A2\t4","A2\t4",
+ "A3\t3","A3\t1","A3\t77",
+ "A4\t3","A4\t3","A4\t59","A4\t29",
+ "A5\t4",
+ "A6\t3","A6\t55","A6\t1",
+ "A7\t39","A7\t27","A7\t85",
+ "A8\t4","A8\t45",
+ "A9\t92", "A9\t42","A9\t1","A9\t0",
+ "A10\t7","A10\t23","A10\t1","A10\t41","A10\t52");
+
+ test.runScript();
+
+ assertOutput(test, "cnt", "(10)");
+ }
+
+ /**
+
+ import 'datafu/count_macros.pig';
+
+ data = LOAD 'input' AS (id:chararray, num:int);
+
+ cnt = count_all_non_distinct(data);
+
+ STORE cnt INTO 'output';
+
+ */
+ @Multiline
+ private String countTest;
+
+ @Test
+ public void countTest() throws Exception
+ {
+ PigTest test = createPigTestFromString(countTest);
+
+ writeLinesToFile("input",
+ "A1\t1","A1\t4","A1\t4","A1\t4",
+ "A2\t4","A2\t4",
+ "A3\t3","A3\t1","A3\t77",
+ "A4\t3","A4\t3","A4\t59","A4\t29",
+ "A5\t4",
+ "A6\t3","A6\t55","A6\t1",
+ "A7\t39","A7\t27","A7\t85",
+ "A8\t4","A8\t45",
+ "A9\t92", "A9\t42","A9\t1","A9\t0",
+ "A10\t7","A10\t23","A10\t1","A10\t41","A10\t52");
+
+ test.runScript();
+
+ assertOutput(test, "cnt", "(31)");
+ }
+
+}