You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datafu.apache.org by mh...@apache.org on 2017/08/03 19:04:27 UTC

incubator-datafu git commit: DATAFU-123 Allow DataFu to include macros

Repository: incubator-datafu
Updated Branches:
  refs/heads/master c93acb64d -> 5b648e2fb


DATAFU-123 Allow DataFu to include macros

Signed-off-by: Matthew Hayes <mh...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/5b648e2f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/5b648e2f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/5b648e2f

Branch: refs/heads/master
Commit: 5b648e2fbb6f1305e6252805d3a1b5421e0a806d
Parents: c93acb6
Author: Eyal Allweil <ea...@paypal.com>
Authored: Thu Aug 3 12:02:57 2017 -0700
Committer: Matthew Hayes <mh...@apache.org>
Committed: Thu Aug 3 12:02:57 2017 -0700

----------------------------------------------------------------------
 .../src/main/resources/datafu/count_macros.pig  |  29 ++++++
 .../src/test/java/datafu/test/pig/PigTests.java |   9 +-
 .../java/datafu/test/pig/macros/MacroTests.java | 102 +++++++++++++++++++
 3 files changed, 137 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/5b648e2f/datafu-pig/src/main/resources/datafu/count_macros.pig
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/resources/datafu/count_macros.pig b/datafu-pig/src/main/resources/datafu/count_macros.pig
new file mode 100644
index 0000000..9bebce4
--- /dev/null
+++ b/datafu-pig/src/main/resources/datafu/count_macros.pig
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DEFINE count_all_non_distinct(alias) returns res {
+  grp_all = GROUP $alias ALL;
+  $res = FOREACH grp_all GENERATE COUNT($alias);
+};
+
+DEFINE count_distinct_keys(alias, key) returns res {
+  just_key = FOREACH $alias GENERATE $key;
+  dist_data = DISTINCT just_key;
+  $res = count_all_non_distinct(dist_data);
+};

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/5b648e2f/datafu-pig/src/test/java/datafu/test/pig/PigTests.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/test/java/datafu/test/pig/PigTests.java b/datafu-pig/src/test/java/datafu/test/pig/PigTests.java
index 6fa500d..b869492 100644
--- a/datafu-pig/src/test/java/datafu/test/pig/PigTests.java
+++ b/datafu-pig/src/test/java/datafu/test/pig/PigTests.java
@@ -54,6 +54,8 @@ public abstract class PigTests
     Logger.getRootLogger().removeAllAppenders();
     Logger.getLogger(JvmMetrics.class).setLevel(Level.OFF);
     
+    System.setProperty("pig.import.search.path", System.getProperty("user.dir") + File.separator + "src" + File.separator + "main" + File.separator + "resources");
+
     // Test files will be created in the following sub-directory
     new File(System.getProperty("user.dir") + File.separator + "build", "test-files").mkdir();		
   }
@@ -233,14 +235,15 @@ public abstract class PigTests
   protected void assertOutput(PigTest test, String alias, String... expected) throws IOException, ParseException
   {
     List<Tuple> tuples = getLinesForAlias(test, alias);
-    assertEquals(expected.length, tuples.size());
+    assertEquals(expected.length, tuples.size(), "Mismatch in number of tuples");
     int i=0;
     for (String e : expected)
     {
-      assertEquals(tuples.get(i++).toString(), e);
+      String actual = tuples.get(i++).toString();
+      assertEquals(actual, e, "Expected " + e + " but found " + actual);
     }
   }
-  
+
   protected File deleteIfExists(File file)
   {
     if (file.exists())

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/5b648e2f/datafu-pig/src/test/java/datafu/test/pig/macros/MacroTests.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/test/java/datafu/test/pig/macros/MacroTests.java b/datafu-pig/src/test/java/datafu/test/pig/macros/MacroTests.java
new file mode 100644
index 0000000..17d0af5
--- /dev/null
+++ b/datafu-pig/src/test/java/datafu/test/pig/macros/MacroTests.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package datafu.test.pig.macros;
+
+import org.adrianwalker.multilinestring.Multiline;
+import org.apache.pig.pigunit.PigTest;
+import org.testng.annotations.Test;
+
+import datafu.test.pig.PigTests;
+
+public class MacroTests extends PigTests
+{
+  /**
+
+  import 'datafu/count_macros.pig';
+
+  data = LOAD 'input' AS (id:chararray, num:int);
+
+  cnt = count_distinct_keys(data, 'id');
+
+  STORE cnt INTO 'output';
+
+   */
+  @Multiline
+  private String countDistinctTest;
+
+  @Test
+  public void countDistinctTest() throws Exception
+  {
+    PigTest test = createPigTestFromString(countDistinctTest);
+
+    writeLinesToFile("input",
+                     "A1\t1","A1\t4","A1\t4","A1\t4",
+                     "A2\t4","A2\t4",
+                     "A3\t3","A3\t1","A3\t77",
+                     "A4\t3","A4\t3","A4\t59","A4\t29",
+                     "A5\t4",
+                     "A6\t3","A6\t55","A6\t1",
+                     "A7\t39","A7\t27","A7\t85",
+                     "A8\t4","A8\t45",
+                     "A9\t92", "A9\t42","A9\t1","A9\t0",
+                     "A10\t7","A10\t23","A10\t1","A10\t41","A10\t52");
+
+    test.runScript();
+
+    assertOutput(test, "cnt", "(10)");
+  }
+
+  /**
+
+  import 'datafu/count_macros.pig';
+
+  data = LOAD 'input' AS (id:chararray, num:int);
+
+  cnt = count_all_non_distinct(data);
+
+  STORE cnt INTO 'output';
+
+   */
+  @Multiline
+  private String countTest;
+
+  @Test
+  public void countTest() throws Exception
+  {
+    PigTest test = createPigTestFromString(countTest);
+
+    writeLinesToFile("input",
+                     "A1\t1","A1\t4","A1\t4","A1\t4",
+                     "A2\t4","A2\t4",
+                     "A3\t3","A3\t1","A3\t77",
+                     "A4\t3","A4\t3","A4\t59","A4\t29",
+                     "A5\t4",
+                     "A6\t3","A6\t55","A6\t1",
+                     "A7\t39","A7\t27","A7\t85",
+                     "A8\t4","A8\t45",
+                     "A9\t92", "A9\t42","A9\t1","A9\t0",
+                     "A10\t7","A10\t23","A10\t1","A10\t41","A10\t52");
+
+    test.runScript();
+
+    assertOutput(test, "cnt", "(31)");
+  }
+
+}