You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/05/15 03:05:48 UTC

[3/3] systemml git commit: [SYSTEMML-2326] Fix distributed cache umask issues in local mode

[SYSTEMML-2326] Fix distributed cache umask issues in local mode 

This patch fixes test issues in local mode for servers with restrictive
umask settings. In such environments the MR distributed cache does not
allow the addition of "hdfs" file (which reside in local file systems
with restricted permissions) to the public cache. We now simple do not
add files to distributed cache in local mode because all relevant jobs
anyway have fallbacks to read from local files in local mode.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/31580f52
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/31580f52
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/31580f52

Branch: refs/heads/master
Commit: 31580f52d318fda5d9ec34297cd6cb4a401b83df
Parents: 9426001
Author: Matthias Boehm <mb...@gmail.com>
Authored: Mon May 14 20:07:09 2018 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Mon May 14 20:07:09 2018 -0700

----------------------------------------------------------------------
 .../sysml/runtime/matrix/CSVReblockMR.java      |  7 ++++--
 .../org/apache/sysml/runtime/matrix/SortMR.java | 23 ++++++++++----------
 .../matrix/mapred/MRJobConfiguration.java       | 19 ++++++++--------
 3 files changed, 26 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/31580f52/src/main/java/org/apache/sysml/runtime/matrix/CSVReblockMR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/CSVReblockMR.java b/src/main/java/org/apache/sysml/runtime/matrix/CSVReblockMR.java
index 89c1bee..97379dd 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/CSVReblockMR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/CSVReblockMR.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.mapred.RunningJob;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.conf.DMLConfig;
+import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 import org.apache.sysml.runtime.instructions.MRJobInstruction;
 import org.apache.sysml.runtime.matrix.data.InputInfo;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
@@ -442,8 +443,10 @@ public class CSVReblockMR
 		//set unique working dir
 		MRJobConfiguration.setUniqueWorkingDir(job);
 		Path cachefile=new Path(counterFile, "part-00000");
-		DistributedCache.addCacheFile(cachefile.toUri(), job);
-		DistributedCache.createSymlink(job);
+		if( !InfrastructureAnalyzer.isLocalMode(job) ) {
+			DistributedCache.addCacheFile(cachefile.toUri(), job);
+			DistributedCache.createSymlink(job);
+		}
 		job.set(ROWID_FILE_NAME, cachefile.toString());
 		
 		RunningJob runjob=JobClient.runJob(job);

http://git-wip-us.apache.org/repos/asf/systemml/blob/31580f52/src/main/java/org/apache/sysml/runtime/matrix/SortMR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/SortMR.java b/src/main/java/org/apache/sysml/runtime/matrix/SortMR.java
index d8b7aed..bbd0ef3 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/SortMR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/SortMR.java
@@ -239,17 +239,18 @@ public class SortMR
 		    job.setReducerClass(ValueSortReducer.class);	
 		    job.setOutputKeyClass(outputInfo.outputKeyClass); //double
 		    job.setOutputValueClass(outputInfo.outputValueClass); //int
-	    }
-	    job.setPartitionerClass(TotalOrderPartitioner.class);
-	    
-	    
-	    //setup distributed cache
-	    DistributedCache.addCacheFile(partitionUri, job);
-	    DistributedCache.createSymlink(job);
-	    
-	    //setup replication factor
-	    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
-	    
+		}
+		job.setPartitionerClass(TotalOrderPartitioner.class);
+
+		//setup distributed cache
+		if( !InfrastructureAnalyzer.isLocalMode(job) ) {
+			DistributedCache.addCacheFile(partitionUri, job);
+			DistributedCache.createSymlink(job);
+		}
+
+		//setup replication factor
+		job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
+
 		//set up custom map/reduce configurations 
 		DMLConfig config = ConfigurationManager.getDMLConfig();
 		MRJobConfiguration.setupCustomMRConfigurations(job, config);

http://git-wip-us.apache.org/repos/asf/systemml/blob/31580f52/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
index 1db292f..7efefbd 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
@@ -914,11 +914,13 @@ public class MRJobConfiguration
 		job.set(DISTCACHE_INPUT_PATHS, pathsString);
 		Path p = null;
 		
-		for(String spath : paths) {
-			p = new Path(spath);
-			
-			DistributedCache.addCacheFile(p.toUri(), job);
-			DistributedCache.createSymlink(job);
+		if( !InfrastructureAnalyzer.isLocalMode(job) ) {
+			for(String spath : paths) {
+				p = new Path(spath);
+				
+				DistributedCache.addCacheFile(p.toUri(), job);
+				DistributedCache.createSymlink(job);
+			}
 		}
 	}
 	
@@ -1132,16 +1134,13 @@ public class MRJobConfiguration
 				outputInfos, inBlockRepresentation, false);
 	}
 
-	public static String setUpSortPartitionFilename( JobConf job ) 
-	{
+	public static String setUpSortPartitionFilename( JobConf job ) {
 		String pfname = constructPartitionFilename();
 		job.set( SORT_PARTITION_FILENAME, pfname );
-		
 		return pfname;
 	}
 
-	public static String getSortPartitionFilename( JobConf job )
-	{
+	public static String getSortPartitionFilename( JobConf job ) {
 		return job.get( SORT_PARTITION_FILENAME );
 	}