You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@crunch.apache.org by jw...@apache.org on 2013/12/12 04:29:29 UTC

[1/2] git commit: Trivial Scrunch test fix for Hadoop2

Updated Branches:
  refs/heads/master 6e6234138 -> e5a360512


Trivial Scrunch test fix for Hadoop2


Project: http://git-wip-us.apache.org/repos/asf/crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/8932f2a2
Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/8932f2a2
Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/8932f2a2

Branch: refs/heads/master
Commit: 8932f2a2022d05ad913bf0d0fcebed27e945ea31
Parents: 6e62341
Author: Josh Wills <jw...@apache.org>
Authored: Wed Dec 11 14:37:54 2013 -0800
Committer: Josh Wills <jw...@apache.org>
Committed: Wed Dec 11 19:26:39 2013 -0800

----------------------------------------------------------------------
 .../src/it/scala/org/apache/crunch/scrunch/DeepCopyTest.scala  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/crunch/blob/8932f2a2/crunch-scrunch/src/it/scala/org/apache/crunch/scrunch/DeepCopyTest.scala
----------------------------------------------------------------------
diff --git a/crunch-scrunch/src/it/scala/org/apache/crunch/scrunch/DeepCopyTest.scala b/crunch-scrunch/src/it/scala/org/apache/crunch/scrunch/DeepCopyTest.scala
index 441a9c6..f30965a 100644
--- a/crunch-scrunch/src/it/scala/org/apache/crunch/scrunch/DeepCopyTest.scala
+++ b/crunch-scrunch/src/it/scala/org/apache/crunch/scrunch/DeepCopyTest.scala
@@ -59,11 +59,11 @@ class DeepCopyTest extends CrunchSuite {
       .toList
 
     // Expected results vs. actual
-    val e12 = Seq((Rec2(1, "a", 0.4), Rec3("a", 4)), (Rec2(1, "a", 0.5), Rec3("a", 4)), (Rec2(1, "b", 0.6), Rec3("b", 5)),
+    val e12 = Set((Rec2(1, "a", 0.4), Rec3("a", 4)), (Rec2(1, "a", 0.5), Rec3("a", 4)), (Rec2(1, "b", 0.6), Rec3("b", 5)),
         (Rec2(1, "b", 0.7), Rec3("b", 5)))
-    val e22 = Seq((Rec2(2, "c", 9.9),Rec3("c", 6)))
+    val e22 = Set((Rec2(2, "c", 9.9),Rec3("c", 6)))
     assertEquals(2, res.size)
-    assertEquals(res.map(_._2.toList), Seq(e12, e22))
+    assertEquals(res.map(_._2.toSet), Seq(e12, e22))
     pipe.done()
   }

[2/2] git commit: Get spark distributed cache working on hadoop2

Posted by jw...@apache.org.

Get spark distributed cache working on hadoop2


Project: http://git-wip-us.apache.org/repos/asf/crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/e5a36051
Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/e5a36051
Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/e5a36051

Branch: refs/heads/master
Commit: e5a360512c61d368358130b4a37ef7fa19f6a4ab
Parents: 8932f2a
Author: Josh Wills <jw...@apache.org>
Authored: Wed Dec 11 16:30:23 2013 -0800
Committer: Josh Wills <jw...@apache.org>
Committed: Wed Dec 11 19:26:54 2013 -0800

----------------------------------------------------------------------
 .../java/org/apache/crunch/impl/spark/SparkRuntimeContext.java | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/crunch/blob/e5a36051/crunch-spark/src/main/java/org/apache/crunch/impl/spark/SparkRuntimeContext.java
----------------------------------------------------------------------
diff --git a/crunch-spark/src/main/java/org/apache/crunch/impl/spark/SparkRuntimeContext.java b/crunch-spark/src/main/java/org/apache/crunch/impl/spark/SparkRuntimeContext.java
index 92336a3..78436c2 100644
--- a/crunch-spark/src/main/java/org/apache/crunch/impl/spark/SparkRuntimeContext.java
+++ b/crunch-spark/src/main/java/org/apache/crunch/impl/spark/SparkRuntimeContext.java
@@ -76,9 +76,13 @@ public class SparkRuntimeContext implements Serializable {
         List<String> allFiles = Lists.newArrayList();
         for (URI uri : uris) {
           File f = new File(uri.getPath());
+          String sparkFile = SparkFiles.get(f.getName());
           allFiles.add(SparkFiles.get(f.getName()));
         }
-        DistributedCache.setLocalFiles(getConfiguration(), Joiner.on(',').join(allFiles));
+        String sparkFiles = Joiner.on(',').join(allFiles);
+        // Hacking this for Hadoop1 and Hadoop2
+        getConfiguration().set("mapreduce.job.cache.local.files", sparkFiles);
+        getConfiguration().set("mapred.cache.localFiles", sparkFiles);
       }
     } catch (IOException e) {
       throw new CrunchRuntimeException(e);