You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by de...@apache.org on 2016/01/26 22:36:41 UTC

incubator-systemml git commit: [SYSTEMML-158] Update deprecated Hadoop properties

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 401e982bf -> e1a2685ad


[SYSTEMML-158] Update deprecated Hadoop properties

Removes the console warnings from deprecated Hadoop properties.
Specify Hadoop v1/v2 properties in MRConfigurationName.
Update io.sort.mb to mapreduce.task.io.sort.mb
Update mapred.job.tracker to mapreduce.jobtracker.address
Update mapred.task.tracker.task-controller to mapreduce.tasktracker.taskcontroller
Update mapred.local.dir to mapreduce.cluster.local.dir
Update mapred.system.dir to mapreduce.jobtracker.system.dir

Closes #50.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e1a2685a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e1a2685a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e1a2685a

Branch: refs/heads/master
Commit: e1a2685ad11c4ca0dcc1cf3bfeb8b74f8c15d1ab
Parents: 401e982
Author: Deron Eriksson <de...@us.ibm.com>
Authored: Tue Jan 26 13:33:13 2016 -0800
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Tue Jan 26 13:33:13 2016 -0800

----------------------------------------------------------------------
 .../java/org/apache/sysml/api/DMLScript.java    | 15 +++--
 .../controlprogram/parfor/RemoteParForMR.java   |  4 +-
 .../parfor/stat/InfrastructureAnalyzer.java     |  8 +--
 .../runtime/matrix/mapred/MMCJMRCache.java      |  2 +-
 .../matrix/mapred/MRConfigurationNames.java     | 67 ++++++++++----------
 .../matrix/mapred/MRJobConfiguration.java       | 10 +--
 .../sysml/yarn/ropt/YarnClusterAnalyzer.java    |  4 +-
 7 files changed, 59 insertions(+), 51 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1a2685a/src/main/java/org/apache/sysml/api/DMLScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java b/src/main/java/org/apache/sysml/api/DMLScript.java
index 0302fdc..ce37bea 100644
--- a/src/main/java/org/apache/sysml/api/DMLScript.java
+++ b/src/main/java/org/apache/sysml/api/DMLScript.java
@@ -834,9 +834,9 @@ public class DMLScript
 		//analyze hadoop configuration
 		JobConf job = ConfigurationManager.getCachedJobConf();
 		boolean localMode     = InfrastructureAnalyzer.isLocalMode(job);
-		String taskController = job.get("mapred.task.tracker.task-controller", "org.apache.hadoop.mapred.DefaultTaskController");
+		String taskController = job.get(MRConfigurationNames.MR_TASKTRACKER_TASKCONTROLLER, "org.apache.hadoop.mapred.DefaultTaskController");
 		String ttGroupName    = job.get("mapreduce.tasktracker.group","null");
-		String perm           = job.get(MRConfigurationNames.DFS_PERMISSIONS,"null"); //note: job.get("dfs.permissions.supergroup",null);
+		String perm           = job.get(MRConfigurationNames.DFS_PERMISSIONS_ENABLED,"null"); //note: job.get("dfs.permissions.supergroup",null);
 		URI fsURI             = FileSystem.getDefaultUri(job);
 
 		//determine security states
@@ -846,9 +846,14 @@ public class DMLScript
 		boolean flagLocalFS = fsURI==null || fsURI.getScheme().equals("file");
 		boolean flagSecurity = perm.equals("yes"); 
 		
-		LOG.debug("SystemML security check: " + "local.user.name = " + userName + ", " + "local.user.groups = " + ProgramConverter.serializeStringCollection(groupNames) + ", "
-				        + "mapred.job.tracker = " + job.get("mapred.job.tracker") + ", " + "mapred.task.tracker.task-controller = " + taskController + "," + "mapreduce.tasktracker.group = " + ttGroupName + ", "
-				        + "fs.default.name = " + ((fsURI!=null)?fsURI.getScheme():"null") + ", " + MRConfigurationNames.DFS_PERMISSIONS+" = " + perm );
+		LOG.debug("SystemML security check: "
+				+ "local.user.name = " + userName + ", "
+				+ "local.user.groups = " + ProgramConverter.serializeStringCollection(groupNames) + ", "
+				+ MRConfigurationNames.MR_JOBTRACKER_ADDRESS + " = " + job.get(MRConfigurationNames.MR_JOBTRACKER_ADDRESS) + ", "
+				+ MRConfigurationNames.MR_TASKTRACKER_TASKCONTROLLER + " = " + taskController + ","
+				+ "mapreduce.tasktracker.group = " + ttGroupName + ", "
+				+ "fs.default.name = " + ((fsURI!=null) ? fsURI.getScheme() : "null") + ", "
+				+ MRConfigurationNames.DFS_PERMISSIONS_ENABLED + " = " + perm );
 
 		//print warning if permission issues possible
 		if( flagDiffUser && ( flagLocalFS || flagSecurity ) )

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1a2685a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForMR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForMR.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForMR.java
index 1bdccfc..a17acb8 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForMR.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForMR.java
@@ -37,7 +37,6 @@ import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RunningJob;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.hadoop.mapred.lib.NLineInputFormat;
-
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.conf.DMLConfig;
@@ -52,6 +51,7 @@ import org.apache.sysml.runtime.controlprogram.parfor.stat.Stat;
 import org.apache.sysml.runtime.instructions.cp.Data;
 import org.apache.sysml.runtime.io.MatrixReader;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;
 import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
 import org.apache.sysml.runtime.util.MapReduceTool;
 import org.apache.sysml.utils.Statistics;
@@ -182,7 +182,7 @@ public class RemoteParForMR
 				job.setNumTasksToExecutePerJvm(-1); //unlimited
 			
 			//set sort io buffer (reduce unnecessary large io buffer, guaranteed memory consumption)
-			job.setInt("io.sort.mb", 8); //8MB
+			job.setInt(MRConfigurationNames.MR_TASK_IO_SORT_MB, 8); //8MB
 			
 			//set the replication factor for the results
 			job.setInt("dfs.replication", replication);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1a2685a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
index 05cfa17..23fd697 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
@@ -265,7 +265,7 @@ public class InfrastructureAnalyzer
 		// Due to a bug in HDP related to fetching the "mode" of execution within mappers,
 		// we explicitly probe the relevant properties instead of relying on results from 
 		// analyzeHadoopCluster().
-		String jobTracker = job.get("mapred.job.tracker", "local");
+		String jobTracker = job.get(MRConfigurationNames.MR_JOBTRACKER_ADDRESS, "local");
 		String framework = job.get("mapreduce.framework.name", "local");
 		boolean isYarnEnabled = (framework!=null && framework.equals("yarn"));
 		
@@ -509,7 +509,7 @@ public class InfrastructureAnalyzer
 	{
 		JobConf job = ConfigurationManager.getCachedJobConf();
 		
-		_remoteMRSortMem = (1024*1024) * job.getLong("io.sort.mb",100); //1MB
+		_remoteMRSortMem = (1024*1024) * job.getLong(MRConfigurationNames.MR_TASK_IO_SORT_MB,100); //1MB
 			
 		//handle jvm max mem (map mem budget is relevant for map-side distcache and parfor)
 		//(for robustness we probe both: child and map configuration parameters)
@@ -526,7 +526,7 @@ public class InfrastructureAnalyzer
 			_remoteJVMMaxMemReduce = extractMaxMemoryOpt(javaOpts1);
 		
 		//HDFS blocksize
-		String blocksize = job.get(MRConfigurationNames.DFS_BLOCK_SIZE, "134217728");
+		String blocksize = job.get(MRConfigurationNames.DFS_BLOCKSIZE, "134217728");
 		_blocksize = Long.parseLong(blocksize);
 		
 		//is yarn enabled
@@ -546,7 +546,7 @@ public class InfrastructureAnalyzer
 	{
 		//analyze if local mode (if yarn enabled, we always assume cluster mode
 		//in order to workaround configuration issues on >=Hadoop 2.6)
-		String jobTracker = job.get("mapred.job.tracker", "local");
+		String jobTracker = job.get(MRConfigurationNames.MR_JOBTRACKER_ADDRESS, "local");
 		return "local".equals(jobTracker)
 			   & !isYarnEnabled();
 	}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1a2685a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MMCJMRCache.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MMCJMRCache.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MMCJMRCache.java
index 27c9bc7..0dd0c39 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MMCJMRCache.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MMCJMRCache.java
@@ -90,7 +90,7 @@ public abstract class MMCJMRCache
 	protected void constructLocalFilePrefix(String fname)
 	{
 		//get random localdir (to spread load across available disks)
-		String[] localDirs = _job.get("mapred.local.dir").split(",");
+		String[] localDirs = _job.get(MRConfigurationNames.MR_CLUSTER_LOCAL_DIR).split(",");
 		Random rand = new Random();
 		int randPos = rand.nextInt(localDirs.length);
 				

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1a2685a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java
index e77b83b..cd6e781 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java
@@ -17,54 +17,57 @@
  * under the License.
  */
 
-
 package org.apache.sysml.runtime.matrix.mapred;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.util.VersionInfo;
 
-
 /**
- * This class provides a central local for used hadoop configuration properties.
- * For portability, we support both hadoop 1.x and 2.x and automatically map to 
- * the currently used cluster.
+ * This class provides a central local for used hadoop configuration properties. For portability, we support both hadoop
+ * 1.x and 2.x and automatically map to the currently used cluster.
  * 
  */
-public abstract class MRConfigurationNames 
-{
+public abstract class MRConfigurationNames {
 
-	
 	protected static final Log LOG = LogFactory.getLog(MRConfigurationNames.class.getName());
-	
-	//name definitions
-	public static final String INVALID = "null";
-	public static String DFS_SESSION_ID = INVALID;
-	public static String DFS_BLOCK_SIZE = INVALID;
-	public static String DFS_PERMISSIONS = INVALID;
 
-	//initialize to used cluster 
-	static{
-		
-		//determine hadoop version
-		//e.g., 2.0.4-alpha from 0a11e32419bd4070f28c6d96db66c2abe9fd6d91 by jenkins source checksum f3c1bf36ae3aa5a6f6d3447fcfadbba
+	public static final String DFS_BLOCKSIZE;
+	public static final String DFS_METRICS_SESSION_ID;
+	public static final String DFS_PERMISSIONS_ENABLED;
+	public static final String MR_CLUSTER_LOCAL_DIR;
+	public static final String MR_JOBTRACKER_ADDRESS;
+	public static final String MR_JOBTRACKER_SYSTEM_DIR;
+	public static final String MR_TASK_IO_SORT_MB;
+	public static final String MR_TASKTRACKER_TASKCONTROLLER;
+
+	// initialize to currently used cluster
+	static {
+		// determine hadoop version
 		String version = VersionInfo.getBuildVersion();
 		boolean hadoopVersion2 = version.startsWith("2");
-		LOG.debug("Hadoop build version: "+version);
-		
-		if( hadoopVersion2 )
-		{
+		LOG.debug("Hadoop build version: " + version);
+
+		if (hadoopVersion2) {
 			LOG.debug("Using hadoop 2.x configuration properties.");
-			DFS_SESSION_ID  = "dfs.metrics.session-id";
-			DFS_BLOCK_SIZE  = "dfs.blocksize";
-			DFS_PERMISSIONS = "dfs.permissions.enabled";
-		}
-		else //any older version
-		{
+			DFS_BLOCKSIZE = "dfs.blocksize";
+			DFS_METRICS_SESSION_ID = "dfs.metrics.session-id";
+			DFS_PERMISSIONS_ENABLED = "dfs.permissions.enabled";
+			MR_CLUSTER_LOCAL_DIR = "mapreduce.cluster.local.dir";
+			MR_JOBTRACKER_ADDRESS = "mapreduce.jobtracker.address";
+			MR_JOBTRACKER_SYSTEM_DIR = "mapreduce.jobtracker.system.dir";
+			MR_TASK_IO_SORT_MB = "mapreduce.task.io.sort.mb";
+			MR_TASKTRACKER_TASKCONTROLLER = "mapreduce.tasktracker.taskcontroller";
+		} else { // any older version
 			LOG.debug("Using hadoop 1.x configuration properties.");
-			DFS_SESSION_ID  = "session.id";
-			DFS_BLOCK_SIZE  = "dfs.block.size";
-			DFS_PERMISSIONS = "dfs.permissions";
+			DFS_BLOCKSIZE = "dfs.block.size";
+			DFS_METRICS_SESSION_ID = "session.id";
+			DFS_PERMISSIONS_ENABLED = "dfs.permissions";
+			MR_CLUSTER_LOCAL_DIR = "mapred.local.dir";
+			MR_JOBTRACKER_ADDRESS = "mapred.job.tracker";
+			MR_JOBTRACKER_SYSTEM_DIR = "mapred.system.dir";
+			MR_TASK_IO_SORT_MB = "io.sort.mb";
+			MR_TASKTRACKER_TASKCONTROLLER = "mapred.task.tracker.task-controller";
 		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1a2685a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
index 3382a0c..6d30b5e 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
@@ -424,7 +424,7 @@ public class MRJobConfiguration
 			String uniqueSubdir = tmp.toString();
 			
 			//unique local dir
-			String[] dirlist = job.get("mapred.local.dir","/tmp").split(",");
+			String[] dirlist = job.get(MRConfigurationNames.MR_CLUSTER_LOCAL_DIR,"/tmp").split(",");
 			StringBuilder sb2 = new StringBuilder();
 			for( String dir : dirlist ) {
 				if( sb2.length()>0 )
@@ -432,10 +432,10 @@ public class MRJobConfiguration
 				sb2.append(dir);
 				sb2.append( uniqueSubdir );
 			}
-			job.set("mapred.local.dir", sb2.toString() );			
+			job.set(MRConfigurationNames.MR_CLUSTER_LOCAL_DIR, sb2.toString() );
 			
 			//unique system dir 
-			job.set("mapred.system.dir", job.get("mapred.system.dir") + uniqueSubdir);
+			job.set(MRConfigurationNames.MR_JOBTRACKER_SYSTEM_DIR, job.get(MRConfigurationNames.MR_JOBTRACKER_SYSTEM_DIR) + uniqueSubdir);
 			
 			//unique staging dir 
 			job.set( "mapreduce.jobtracker.staging.root.dir",  job.get("mapreduce.jobtracker.staging.root.dir") + uniqueSubdir );
@@ -444,12 +444,12 @@ public class MRJobConfiguration
 	
 	public static String getLocalWorkingDirPrefix(JobConf job)
 	{
-		return job.get("mapred.local.dir");
+		return job.get(MRConfigurationNames.MR_CLUSTER_LOCAL_DIR);
 	}
 	
 	public static String getSystemWorkingDirPrefix(JobConf job)
 	{
-		return job.get("mapred.system.dir");
+		return job.get(MRConfigurationNames.MR_JOBTRACKER_SYSTEM_DIR);
 	}
 	
 	public static String getStagingWorkingDirPrefix(JobConf job)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1a2685a/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java b/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java
index af15807..18a2dfc 100644
--- a/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java
+++ b/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java
@@ -676,7 +676,7 @@ public class YarnClusterAnalyzer
 				throw new YarnException("There are no available nodes in the yarn cluster");
 			
 			// Now get the default cluster settings
-			_remoteMRSortMem = (1024*1024) * conf.getLong("io.sort.mb",100); //100MB
+			_remoteMRSortMem = (1024*1024) * conf.getLong(MRConfigurationNames.MR_TASK_IO_SORT_MB,100); //100MB
 
 			//handle jvm max mem (map mem budget is relevant for map-side distcache and parfor)
 			//(for robustness we probe both: child and map configuration parameters)
@@ -693,7 +693,7 @@ public class YarnClusterAnalyzer
 				_remoteJVMMaxMemReduce = extractMaxMemoryOpt(javaOpts1);
 			
 			//HDFS blocksize
-			String blocksize = conf.get(MRConfigurationNames.DFS_BLOCK_SIZE, "134217728");
+			String blocksize = conf.get(MRConfigurationNames.DFS_BLOCKSIZE, "134217728");
 			_blocksize = Long.parseLong(blocksize);
 			
 			minimalPhyAllocate = (long) 1024 * 1024 * conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,