You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by om...@apache.org on 2011/03/08 06:59:54 UTC
svn commit: r1079245 - in
/hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src:
java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java
test/org/apache/hadoop/mapred/gridmix/TestDistCacheEmulation.java
Author: omalley
Date: Tue Mar 8 05:59:54 2011
New Revision: 1079245
URL: http://svn.apache.org/viewvc?rev=1079245&view=rev
Log:
commit d7db039091c36ceb52b9458e2b8306c7c09e8281
Author: Ravi Gummadi <gr...@yahoo-inc.com>
Date: Thu Jan 13 15:09:51 2011 +0530
: Fix NPE issue in Gridmix because of deprecated configuration
properties related to Distributed Cache in trace files of 0.20 history files.
Patch is available at
(gravi)
Modified:
hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java
hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestDistCacheEmulation.java
Modified: hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java?rev=1079245&r1=1079244&r2=1079245&view=diff
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java (original)
+++ hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java Tue Mar 8 05:59:54 2011
@@ -118,7 +118,16 @@ class DistributedCacheEmulator {
// Pseudo local file system where local FS based dist cache files are
// created by gridmix.
FileSystem pseudoLocalFs = null;
-
+
+ {
+ // Need to handle deprecation of these MapReduce-internal configuration
+ // properties as MapReduce doesn't handle their deprecation.
+ Configuration.addDeprecation("mapred.cache.files.filesizes",
+ new String[] {MRJobConfig.CACHE_FILES_SIZES});
+ Configuration.addDeprecation("mapred.cache.files.visibilities",
+ new String[] {MRJobConfig.CACHE_FILE_VISIBILITIES});
+ }
+
/**
* @param conf gridmix configuration
* @param ioPath <ioPath>/distributedCache/ is the gridmix Distributed
@@ -510,12 +519,16 @@ class DistributedCacheEmulator {
cacheFiles.add(mappedPath);
}
}
- // configure hdfs based dist cache files for simulated job
- conf.setStrings(MRJobConfig.CACHE_FILES,
- cacheFiles.toArray(new String[cacheFiles.size()]));
- // configure local FS based dist cache files for simulated job
- conf.setStrings("tmpfiles", localCacheFiles.toArray(
- new String[localCacheFiles.size()]));
+ if (cacheFiles.size() > 0) {
+ // configure hdfs based dist cache files for simulated job
+ conf.setStrings(MRJobConfig.CACHE_FILES,
+ cacheFiles.toArray(new String[cacheFiles.size()]));
+ }
+ if (localCacheFiles.size() > 0) {
+ // configure local FS based dist cache files for simulated job
+ conf.setStrings("tmpfiles", localCacheFiles.toArray(
+ new String[localCacheFiles.size()]));
+ }
}
}
}
Modified: hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestDistCacheEmulation.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestDistCacheEmulation.java?rev=1079245&r1=1079244&r2=1079245&view=diff
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestDistCacheEmulation.java (original)
+++ hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestDistCacheEmulation.java Tue Mar 8 05:59:54 2011
@@ -129,18 +129,16 @@ public class TestDistCacheEmulation {
}
/**
- * Runs setupGenerateDistCacheData() on a new DistrbutedCacheEmulator and
- * and returns the jobConf. Fills the array <code>sortedFileSizes</code> that
- * can be used for validation.
- * Validation of exit code from setupGenerateDistCacheData() is done.
- * @param generate true if -generate option is specified
- * @param sortedFileSizes sorted distributed cache file sizes
+ * Configures 5 HDFS-based dist cache files and 1 local-FS-based dist cache
+ * file in the given Configuration object <code>conf</code>.
+ * @param conf configuration where dist cache config properties are to be set
+ * @param useOldProperties <code>true</code> if old config properties are to
+ * be set
+ * @return array of sorted HDFS-based distributed cache file sizes
* @throws IOException
- * @throws InterruptedException
*/
- private JobConf runSetupGenerateDistCacheData(boolean generate,
- long[] sortedFileSizes) throws IOException, InterruptedException {
- Configuration conf = new Configuration();
+ private long[] configureDummyDistCacheFiles(Configuration conf,
+ boolean useOldProperties) throws IOException {
String user = UserGroupInformation.getCurrentUser().getShortUserName();
conf.set(MRJobConfig.USER_NAME, user);
// Set some dummy dist cache files in gridmix configuration so that they go
@@ -152,17 +150,41 @@ public class TestDistCacheEmulation {
"subdir1/file5.txt",
"subdir2/file6.gz"};
String[] fileSizes = {"400", "2500", "700", "1200", "1500", "500"};
- // local FS based dist cache file whose path contains <user>/.staging is
- // not created on HDFS. So file size 2500 is not added to sortedFileSizes
- // and its visibility is not added to sortedVisibilities.
- System.arraycopy(new long[] {1500, 1200, 700, 500, 400}, 0,
- sortedFileSizes, 0, 5);
+
String[] visibilities = {"true", "false", "false", "true", "true", "false"};
String[] timeStamps = {"1234", "2345", "34567", "5434", "125", "134"};
- conf.setStrings(MRJobConfig.CACHE_FILES, distCacheFiles);
- conf.setStrings(MRJobConfig.CACHE_FILES_SIZES, fileSizes);
- conf.setStrings(MRJobConfig.CACHE_FILE_VISIBILITIES, visibilities);
- conf.setStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS, timeStamps);
+ if (useOldProperties) {
+ conf.setStrings("mapred.cache.files", distCacheFiles);
+ conf.setStrings("mapred.cache.files.filesizes", fileSizes);
+ conf.setStrings("mapred.cache.files.visibilities", visibilities);
+ conf.setStrings("mapred.cache.files.timestamps", timeStamps);
+ } else {
+ conf.setStrings(MRJobConfig.CACHE_FILES, distCacheFiles);
+ conf.setStrings(MRJobConfig.CACHE_FILES_SIZES, fileSizes);
+ conf.setStrings(MRJobConfig.CACHE_FILE_VISIBILITIES, visibilities);
+ conf.setStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS, timeStamps);
+ }
+ // local FS based dist cache file whose path contains <user>/.staging is
+ // not created on HDFS. So file size 2500 is not added to sortedFileSizes.
+ long[] sortedFileSizes = new long[] {1500, 1200, 700, 500, 400};
+ return sortedFileSizes;
+ }
+
+ /**
+ * Runs setupGenerateDistCacheData() on a new DistrbutedCacheEmulator and
+ * and returns the jobConf. Fills the array <code>sortedFileSizes</code> that
+ * can be used for validation.
+ * Validation of exit code from setupGenerateDistCacheData() is done.
+ * @param generate true if -generate option is specified
+ * @param sortedFileSizes sorted HDFS-based distributed cache file sizes
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ private JobConf runSetupGenerateDistCacheData(boolean generate,
+ long[] sortedFileSizes) throws IOException, InterruptedException {
+ Configuration conf = new Configuration();
+ long[] fileSizes = configureDummyDistCacheFiles(conf, false);
+ System.arraycopy(fileSizes, 0, sortedFileSizes, 0, fileSizes.length);
// Job stories of all 3 jobs will have same dist cache files in their
// configurations
@@ -186,11 +208,26 @@ public class TestDistCacheEmulation {
expectedExitCode, exitCode);
// reset back
+ resetDistCacheConfigProperties(jobConf);
+ return jobConf;
+ }
+
+ /**
+ * Reset the config properties related to Distributed Cache in the given
+ * job configuration <code>jobConf</code>.
+ * @param jobConf job configuration
+ */
+ private void resetDistCacheConfigProperties(JobConf jobConf) {
+ // reset current/latest property names
jobConf.setStrings(MRJobConfig.CACHE_FILES, "");
jobConf.setStrings(MRJobConfig.CACHE_FILES_SIZES, "");
jobConf.setStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS, "");
jobConf.setStrings(MRJobConfig.CACHE_FILE_VISIBILITIES, "");
- return jobConf;
+ // reset old property names
+ jobConf.setStrings("mapred.cache.files", "");
+ jobConf.setStrings("mapred.cache.files.filesizes", "");
+ jobConf.setStrings("mapred.cache.files.visibilities", "");
+ jobConf.setStrings("mapred.cache.files.timestamps", "");
}
/**
@@ -358,4 +395,88 @@ public class TestDistCacheEmulation {
+ DistributedCacheEmulator.GRIDMIX_EMULATE_DISTRIBUTEDCACHE
+ " to false is not working.", dce.shouldEmulateDistCacheLoad());
}
+
+ /**
+ * Verify if DistributedCacheEmulator can configure distributed cache files
+ * for simulated job if job conf from trace had no dist cache files.
+ * @param conf configuration for the simulated job to be run
+ * @param jobConf job configuration of original cluster's job, obtained from
+ * trace
+ * @throws IOException
+ */
+ private void validateJobConfWithOutDCFiles(Configuration conf,
+ JobConf jobConf) throws IOException {
+ // Validate if Gridmix can configure dist cache files properly if there are
+ // no HDFS-based dist cache files and localFS-based dist cache files in
+ // trace for a job.
+ dce.configureDistCacheFiles(conf, jobConf);
+ assertNull("Distributed cache files configured by GridMix is wrong.",
+ conf.get(MRJobConfig.CACHE_FILES));
+ assertNull("Distributed cache files configured by Gridmix through -files "
+ + "option is wrong.", conf.get("tmpfiles"));
+ }
+
+ /**
+ * Verify if DistributedCacheEmulator can configure distributed cache files
+ * for simulated job if job conf from trace had HDFS-based dist cache files
+ * and local-FS-based dist cache files.
+ * <br>Also validate if Gridmix can handle/read deprecated config properties
+ * like mapred.cache.files.filesizes and mapred.cache.files.visibilities from
+ * trace file.
+ * @param conf configuration for the simulated job to be run
+ * @param jobConf job configuration of original cluster's job, obtained from
+ * trace
+ * @throws IOException
+ */
+ private void validateJobConfWithDCFiles(Configuration conf,
+ JobConf jobConf) throws IOException {
+ long[] sortedFileSizes = configureDummyDistCacheFiles(jobConf, true);
+
+ // Validate if Gridmix can handle deprecated config properties like
+ // mapred.cache.files.filesizes and mapred.cache.files.visibilities.
+ // 1 local FS based dist cache file and 5 HDFS based dist cache files. So
+ // total expected dist cache files count is 6.
+ assertEquals("Gridmix is not able to extract dist cache file sizes.",
+ 6, jobConf.getStrings(MRJobConfig.CACHE_FILES_SIZES).length);
+ assertEquals("Gridmix is not able to extract dist cache file visibilities.",
+ 6, jobConf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES).length);
+
+ dce.configureDistCacheFiles(conf, jobConf);
+
+ assertEquals("Configuring of HDFS-based dist cache files by gridmix is "
+ + "wrong.", sortedFileSizes.length,
+ conf.getStrings(MRJobConfig.CACHE_FILES).length);
+ assertEquals("Configuring of local-FS-based dist cache files by gridmix is "
+ + "wrong.", 1, conf.getStrings("tmpfiles").length);
+ }
+
+ /**
+ * Test if Gridmix can configure config properties related to Distributed
+ * Cache properly. Also verify if Gridmix can handle deprecated config
+ * properties related to Distributed Cache.
+ * @throws IOException
+ */
+ @Test
+ public void testDistCacheFilesConfiguration() throws IOException {
+ Configuration conf = new Configuration();
+ JobConf jobConf = GridmixTestUtils.mrCluster.createJobConf(
+ new JobConf(conf));
+ Path ioPath = new Path("testDistCacheEmulationConfigurability")
+ .makeQualified(GridmixTestUtils.dfs);
+ FileSystem fs = FileSystem.get(jobConf);
+ FileSystem.mkdirs(fs, ioPath, new FsPermission((short)0777));
+
+ // default config
+ dce = createDistributedCacheEmulator(jobConf, ioPath, false);
+ assertTrue("Default configuration of "
+ + DistributedCacheEmulator.GRIDMIX_EMULATE_DISTRIBUTEDCACHE
+ + " is wrong.", dce.shouldEmulateDistCacheLoad());
+
+ validateJobConfWithOutDCFiles(conf, jobConf);
+
+ // Validate if Gridmix can configure dist cache files properly if there are
+ // HDFS-based dist cache files and localFS-based dist cache files in trace
+ // for a job. Set old config properties and validate.
+ validateJobConfWithDCFiles(conf, jobConf);
+ }
}