You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@crunch.apache.org by jw...@apache.org on 2013/12/12 04:29:30 UTC
[2/2] git commit: Get spark distributed cache working on hadoop2
Get spark distributed cache working on hadoop2
Project: http://git-wip-us.apache.org/repos/asf/crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/e5a36051
Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/e5a36051
Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/e5a36051
Branch: refs/heads/master
Commit: e5a360512c61d368358130b4a37ef7fa19f6a4ab
Parents: 8932f2a
Author: Josh Wills <jw...@apache.org>
Authored: Wed Dec 11 16:30:23 2013 -0800
Committer: Josh Wills <jw...@apache.org>
Committed: Wed Dec 11 19:26:54 2013 -0800
----------------------------------------------------------------------
.../java/org/apache/crunch/impl/spark/SparkRuntimeContext.java | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/crunch/blob/e5a36051/crunch-spark/src/main/java/org/apache/crunch/impl/spark/SparkRuntimeContext.java
----------------------------------------------------------------------
diff --git a/crunch-spark/src/main/java/org/apache/crunch/impl/spark/SparkRuntimeContext.java b/crunch-spark/src/main/java/org/apache/crunch/impl/spark/SparkRuntimeContext.java
index 92336a3..78436c2 100644
--- a/crunch-spark/src/main/java/org/apache/crunch/impl/spark/SparkRuntimeContext.java
+++ b/crunch-spark/src/main/java/org/apache/crunch/impl/spark/SparkRuntimeContext.java
@@ -76,9 +76,13 @@ public class SparkRuntimeContext implements Serializable {
List<String> allFiles = Lists.newArrayList();
for (URI uri : uris) {
File f = new File(uri.getPath());
+ String sparkFile = SparkFiles.get(f.getName());
allFiles.add(SparkFiles.get(f.getName()));
}
- DistributedCache.setLocalFiles(getConfiguration(), Joiner.on(',').join(allFiles));
+ String sparkFiles = Joiner.on(',').join(allFiles);
+ // Hacking this for Hadoop1 and Hadoop2
+ getConfiguration().set("mapreduce.job.cache.local.files", sparkFiles);
+ getConfiguration().set("mapred.cache.localFiles", sparkFiles);
}
} catch (IOException e) {
throw new CrunchRuntimeException(e);