You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@crunch.apache.org by mk...@apache.org on 2016/01/22 03:47:21 UTC

crunch git commit: Make replication factor for DistCache configurable

Repository: crunch
Updated Branches:
  refs/heads/master e59cb17a4 -> 1e6224594


Make replication factor for DistCache configurable

Add configuration option to set the replication factor for files
distributed with the DistCache helper class.

Signed-off-by: mkwhitacre <mk...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/1e622459
Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/1e622459
Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/1e622459

Branch: refs/heads/master
Commit: 1e62245947db91ffbb6e6f4ec83940c2b104b245
Parents: e59cb17
Author: Steffen Grohsschmiedt <st...@spotify.com>
Authored: Thu Jan 21 10:42:07 2016 +0100
Committer: mkwhitacre <mk...@gmail.com>
Committed: Thu Jan 21 20:32:02 2016 -0600

----------------------------------------------------------------------
 .../src/main/java/org/apache/crunch/util/DistCache.java  | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/crunch/blob/1e622459/crunch-core/src/main/java/org/apache/crunch/util/DistCache.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/util/DistCache.java b/crunch-core/src/main/java/org/apache/crunch/util/DistCache.java
index 0325e12..046f038 100644
--- a/crunch-core/src/main/java/org/apache/crunch/util/DistCache.java
+++ b/crunch-core/src/main/java/org/apache/crunch/util/DistCache.java
@@ -46,12 +46,21 @@ import org.apache.hadoop.fs.Path;
  */
 public class DistCache {
 
+  /**
+   * Configuration key for setting the replication factor for files distributed using the Crunch
+   * DistCache helper class. This can be used to scale read access for files used by the Crunch
+   * framework.
+   */
+  public static final String DIST_CACHE_REPLICATION = "crunch.distcache.replication";
+
   // Configuration key holding the paths of jars to export to the distributed
   // cache.
   private static final String TMPJARS_KEY = "tmpjars";
 
   public static void write(Configuration conf, Path path, Object value) throws IOException {
-    ObjectOutputStream oos = new ObjectOutputStream(path.getFileSystem(conf).create(path));
+    FileSystem fs = path.getFileSystem(conf);
+    short replication = (short) conf.getInt(DIST_CACHE_REPLICATION, fs.getDefaultReplication(path));
+    ObjectOutputStream oos = new ObjectOutputStream(fs.create(path, replication));
     oos.writeObject(value);
     oos.close();