You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by de...@apache.org on 2016/01/26 22:36:41 UTC
incubator-systemml git commit: [SYSTEMML-158] Update deprecated
Hadoop properties
Repository: incubator-systemml
Updated Branches:
refs/heads/master 401e982bf -> e1a2685ad
[SYSTEMML-158] Update deprecated Hadoop properties
Removes the console warnings from deprecated Hadoop properties.
Specify Hadoop v1/v2 properties in MRConfigurationName.
Update io.sort.mb to mapreduce.task.io.sort.mb
Update mapred.job.tracker to mapreduce.jobtracker.address
Update mapred.task.tracker.task-controller to mapreduce.tasktracker.taskcontroller
Update mapred.local.dir to mapreduce.cluster.local.dir
Update mapred.system.dir to mapreduce.jobtracker.system.dir
Closes #50.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e1a2685a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e1a2685a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e1a2685a
Branch: refs/heads/master
Commit: e1a2685ad11c4ca0dcc1cf3bfeb8b74f8c15d1ab
Parents: 401e982
Author: Deron Eriksson <de...@us.ibm.com>
Authored: Tue Jan 26 13:33:13 2016 -0800
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Tue Jan 26 13:33:13 2016 -0800
----------------------------------------------------------------------
.../java/org/apache/sysml/api/DMLScript.java | 15 +++--
.../controlprogram/parfor/RemoteParForMR.java | 4 +-
.../parfor/stat/InfrastructureAnalyzer.java | 8 +--
.../runtime/matrix/mapred/MMCJMRCache.java | 2 +-
.../matrix/mapred/MRConfigurationNames.java | 67 ++++++++++----------
.../matrix/mapred/MRJobConfiguration.java | 10 +--
.../sysml/yarn/ropt/YarnClusterAnalyzer.java | 4 +-
7 files changed, 59 insertions(+), 51 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1a2685a/src/main/java/org/apache/sysml/api/DMLScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java b/src/main/java/org/apache/sysml/api/DMLScript.java
index 0302fdc..ce37bea 100644
--- a/src/main/java/org/apache/sysml/api/DMLScript.java
+++ b/src/main/java/org/apache/sysml/api/DMLScript.java
@@ -834,9 +834,9 @@ public class DMLScript
//analyze hadoop configuration
JobConf job = ConfigurationManager.getCachedJobConf();
boolean localMode = InfrastructureAnalyzer.isLocalMode(job);
- String taskController = job.get("mapred.task.tracker.task-controller", "org.apache.hadoop.mapred.DefaultTaskController");
+ String taskController = job.get(MRConfigurationNames.MR_TASKTRACKER_TASKCONTROLLER, "org.apache.hadoop.mapred.DefaultTaskController");
String ttGroupName = job.get("mapreduce.tasktracker.group","null");
- String perm = job.get(MRConfigurationNames.DFS_PERMISSIONS,"null"); //note: job.get("dfs.permissions.supergroup",null);
+ String perm = job.get(MRConfigurationNames.DFS_PERMISSIONS_ENABLED,"null"); //note: job.get("dfs.permissions.supergroup",null);
URI fsURI = FileSystem.getDefaultUri(job);
//determine security states
@@ -846,9 +846,14 @@ public class DMLScript
boolean flagLocalFS = fsURI==null || fsURI.getScheme().equals("file");
boolean flagSecurity = perm.equals("yes");
- LOG.debug("SystemML security check: " + "local.user.name = " + userName + ", " + "local.user.groups = " + ProgramConverter.serializeStringCollection(groupNames) + ", "
- + "mapred.job.tracker = " + job.get("mapred.job.tracker") + ", " + "mapred.task.tracker.task-controller = " + taskController + "," + "mapreduce.tasktracker.group = " + ttGroupName + ", "
- + "fs.default.name = " + ((fsURI!=null)?fsURI.getScheme():"null") + ", " + MRConfigurationNames.DFS_PERMISSIONS+" = " + perm );
+ LOG.debug("SystemML security check: "
+ + "local.user.name = " + userName + ", "
+ + "local.user.groups = " + ProgramConverter.serializeStringCollection(groupNames) + ", "
+ + MRConfigurationNames.MR_JOBTRACKER_ADDRESS + " = " + job.get(MRConfigurationNames.MR_JOBTRACKER_ADDRESS) + ", "
+ + MRConfigurationNames.MR_TASKTRACKER_TASKCONTROLLER + " = " + taskController + ","
+ + "mapreduce.tasktracker.group = " + ttGroupName + ", "
+ + "fs.default.name = " + ((fsURI!=null) ? fsURI.getScheme() : "null") + ", "
+ + MRConfigurationNames.DFS_PERMISSIONS_ENABLED + " = " + perm );
//print warning if permission issues possible
if( flagDiffUser && ( flagLocalFS || flagSecurity ) )
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1a2685a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForMR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForMR.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForMR.java
index 1bdccfc..a17acb8 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForMR.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteParForMR.java
@@ -37,7 +37,6 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.lib.NLineInputFormat;
-
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
@@ -52,6 +51,7 @@ import org.apache.sysml.runtime.controlprogram.parfor.stat.Stat;
import org.apache.sysml.runtime.instructions.cp.Data;
import org.apache.sysml.runtime.io.MatrixReader;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
import org.apache.sysml.runtime.util.MapReduceTool;
import org.apache.sysml.utils.Statistics;
@@ -182,7 +182,7 @@ public class RemoteParForMR
job.setNumTasksToExecutePerJvm(-1); //unlimited
//set sort io buffer (reduce unnecessary large io buffer, guaranteed memory consumption)
- job.setInt("io.sort.mb", 8); //8MB
+ job.setInt(MRConfigurationNames.MR_TASK_IO_SORT_MB, 8); //8MB
//set the replication factor for the results
job.setInt("dfs.replication", replication);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1a2685a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
index 05cfa17..23fd697 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
@@ -265,7 +265,7 @@ public class InfrastructureAnalyzer
// Due to a bug in HDP related to fetching the "mode" of execution within mappers,
// we explicitly probe the relevant properties instead of relying on results from
// analyzeHadoopCluster().
- String jobTracker = job.get("mapred.job.tracker", "local");
+ String jobTracker = job.get(MRConfigurationNames.MR_JOBTRACKER_ADDRESS, "local");
String framework = job.get("mapreduce.framework.name", "local");
boolean isYarnEnabled = (framework!=null && framework.equals("yarn"));
@@ -509,7 +509,7 @@ public class InfrastructureAnalyzer
{
JobConf job = ConfigurationManager.getCachedJobConf();
- _remoteMRSortMem = (1024*1024) * job.getLong("io.sort.mb",100); //1MB
+ _remoteMRSortMem = (1024*1024) * job.getLong(MRConfigurationNames.MR_TASK_IO_SORT_MB,100); //1MB
//handle jvm max mem (map mem budget is relevant for map-side distcache and parfor)
//(for robustness we probe both: child and map configuration parameters)
@@ -526,7 +526,7 @@ public class InfrastructureAnalyzer
_remoteJVMMaxMemReduce = extractMaxMemoryOpt(javaOpts1);
//HDFS blocksize
- String blocksize = job.get(MRConfigurationNames.DFS_BLOCK_SIZE, "134217728");
+ String blocksize = job.get(MRConfigurationNames.DFS_BLOCKSIZE, "134217728");
_blocksize = Long.parseLong(blocksize);
//is yarn enabled
@@ -546,7 +546,7 @@ public class InfrastructureAnalyzer
{
//analyze if local mode (if yarn enabled, we always assume cluster mode
//in order to workaround configuration issues on >=Hadoop 2.6)
- String jobTracker = job.get("mapred.job.tracker", "local");
+ String jobTracker = job.get(MRConfigurationNames.MR_JOBTRACKER_ADDRESS, "local");
return "local".equals(jobTracker)
& !isYarnEnabled();
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1a2685a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MMCJMRCache.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MMCJMRCache.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MMCJMRCache.java
index 27c9bc7..0dd0c39 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MMCJMRCache.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MMCJMRCache.java
@@ -90,7 +90,7 @@ public abstract class MMCJMRCache
protected void constructLocalFilePrefix(String fname)
{
//get random localdir (to spread load across available disks)
- String[] localDirs = _job.get("mapred.local.dir").split(",");
+ String[] localDirs = _job.get(MRConfigurationNames.MR_CLUSTER_LOCAL_DIR).split(",");
Random rand = new Random();
int randPos = rand.nextInt(localDirs.length);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1a2685a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java
index e77b83b..cd6e781 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java
@@ -17,54 +17,57 @@
* under the License.
*/
-
package org.apache.sysml.runtime.matrix.mapred;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.util.VersionInfo;
-
/**
- * This class provides a central local for used hadoop configuration properties.
- * For portability, we support both hadoop 1.x and 2.x and automatically map to
- * the currently used cluster.
+ * This class provides a central local for used hadoop configuration properties. For portability, we support both hadoop
+ * 1.x and 2.x and automatically map to the currently used cluster.
*
*/
-public abstract class MRConfigurationNames
-{
+public abstract class MRConfigurationNames {
-
protected static final Log LOG = LogFactory.getLog(MRConfigurationNames.class.getName());
-
- //name definitions
- public static final String INVALID = "null";
- public static String DFS_SESSION_ID = INVALID;
- public static String DFS_BLOCK_SIZE = INVALID;
- public static String DFS_PERMISSIONS = INVALID;
- //initialize to used cluster
- static{
-
- //determine hadoop version
- //e.g., 2.0.4-alpha from 0a11e32419bd4070f28c6d96db66c2abe9fd6d91 by jenkins source checksum f3c1bf36ae3aa5a6f6d3447fcfadbba
+ public static final String DFS_BLOCKSIZE;
+ public static final String DFS_METRICS_SESSION_ID;
+ public static final String DFS_PERMISSIONS_ENABLED;
+ public static final String MR_CLUSTER_LOCAL_DIR;
+ public static final String MR_JOBTRACKER_ADDRESS;
+ public static final String MR_JOBTRACKER_SYSTEM_DIR;
+ public static final String MR_TASK_IO_SORT_MB;
+ public static final String MR_TASKTRACKER_TASKCONTROLLER;
+
+ // initialize to currently used cluster
+ static {
+ // determine hadoop version
String version = VersionInfo.getBuildVersion();
boolean hadoopVersion2 = version.startsWith("2");
- LOG.debug("Hadoop build version: "+version);
-
- if( hadoopVersion2 )
- {
+ LOG.debug("Hadoop build version: " + version);
+
+ if (hadoopVersion2) {
LOG.debug("Using hadoop 2.x configuration properties.");
- DFS_SESSION_ID = "dfs.metrics.session-id";
- DFS_BLOCK_SIZE = "dfs.blocksize";
- DFS_PERMISSIONS = "dfs.permissions.enabled";
- }
- else //any older version
- {
+ DFS_BLOCKSIZE = "dfs.blocksize";
+ DFS_METRICS_SESSION_ID = "dfs.metrics.session-id";
+ DFS_PERMISSIONS_ENABLED = "dfs.permissions.enabled";
+ MR_CLUSTER_LOCAL_DIR = "mapreduce.cluster.local.dir";
+ MR_JOBTRACKER_ADDRESS = "mapreduce.jobtracker.address";
+ MR_JOBTRACKER_SYSTEM_DIR = "mapreduce.jobtracker.system.dir";
+ MR_TASK_IO_SORT_MB = "mapreduce.task.io.sort.mb";
+ MR_TASKTRACKER_TASKCONTROLLER = "mapreduce.tasktracker.taskcontroller";
+ } else { // any older version
LOG.debug("Using hadoop 1.x configuration properties.");
- DFS_SESSION_ID = "session.id";
- DFS_BLOCK_SIZE = "dfs.block.size";
- DFS_PERMISSIONS = "dfs.permissions";
+ DFS_BLOCKSIZE = "dfs.block.size";
+ DFS_METRICS_SESSION_ID = "session.id";
+ DFS_PERMISSIONS_ENABLED = "dfs.permissions";
+ MR_CLUSTER_LOCAL_DIR = "mapred.local.dir";
+ MR_JOBTRACKER_ADDRESS = "mapred.job.tracker";
+ MR_JOBTRACKER_SYSTEM_DIR = "mapred.system.dir";
+ MR_TASK_IO_SORT_MB = "io.sort.mb";
+ MR_TASKTRACKER_TASKCONTROLLER = "mapred.task.tracker.task-controller";
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1a2685a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
index 3382a0c..6d30b5e 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
@@ -424,7 +424,7 @@ public class MRJobConfiguration
String uniqueSubdir = tmp.toString();
//unique local dir
- String[] dirlist = job.get("mapred.local.dir","/tmp").split(",");
+ String[] dirlist = job.get(MRConfigurationNames.MR_CLUSTER_LOCAL_DIR,"/tmp").split(",");
StringBuilder sb2 = new StringBuilder();
for( String dir : dirlist ) {
if( sb2.length()>0 )
@@ -432,10 +432,10 @@ public class MRJobConfiguration
sb2.append(dir);
sb2.append( uniqueSubdir );
}
- job.set("mapred.local.dir", sb2.toString() );
+ job.set(MRConfigurationNames.MR_CLUSTER_LOCAL_DIR, sb2.toString() );
//unique system dir
- job.set("mapred.system.dir", job.get("mapred.system.dir") + uniqueSubdir);
+ job.set(MRConfigurationNames.MR_JOBTRACKER_SYSTEM_DIR, job.get(MRConfigurationNames.MR_JOBTRACKER_SYSTEM_DIR) + uniqueSubdir);
//unique staging dir
job.set( "mapreduce.jobtracker.staging.root.dir", job.get("mapreduce.jobtracker.staging.root.dir") + uniqueSubdir );
@@ -444,12 +444,12 @@ public class MRJobConfiguration
public static String getLocalWorkingDirPrefix(JobConf job)
{
- return job.get("mapred.local.dir");
+ return job.get(MRConfigurationNames.MR_CLUSTER_LOCAL_DIR);
}
public static String getSystemWorkingDirPrefix(JobConf job)
{
- return job.get("mapred.system.dir");
+ return job.get(MRConfigurationNames.MR_JOBTRACKER_SYSTEM_DIR);
}
public static String getStagingWorkingDirPrefix(JobConf job)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1a2685a/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java b/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java
index af15807..18a2dfc 100644
--- a/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java
+++ b/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java
@@ -676,7 +676,7 @@ public class YarnClusterAnalyzer
throw new YarnException("There are no available nodes in the yarn cluster");
// Now get the default cluster settings
- _remoteMRSortMem = (1024*1024) * conf.getLong("io.sort.mb",100); //100MB
+ _remoteMRSortMem = (1024*1024) * conf.getLong(MRConfigurationNames.MR_TASK_IO_SORT_MB,100); //100MB
//handle jvm max mem (map mem budget is relevant for map-side distcache and parfor)
//(for robustness we probe both: child and map configuration parameters)
@@ -693,7 +693,7 @@ public class YarnClusterAnalyzer
_remoteJVMMaxMemReduce = extractMaxMemoryOpt(javaOpts1);
//HDFS blocksize
- String blocksize = conf.get(MRConfigurationNames.DFS_BLOCK_SIZE, "134217728");
+ String blocksize = conf.get(MRConfigurationNames.DFS_BLOCKSIZE, "134217728");
_blocksize = Long.parseLong(blocksize);
minimalPhyAllocate = (long) 1024 * 1024 * conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,