You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by jl...@apache.org on 2013/11/11 20:22:38 UTC

svn commit: r1540813 - in /hadoop/common/trunk/hadoop-mapreduce-project: ./ hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/ hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/ h...

Author: jlowe
Date: Mon Nov 11 19:22:38 2013
New Revision: 1540813

URL: http://svn.apache.org/r1540813
Log:
MAPREDUCE-5186. mapreduce.job.max.split.locations causes some splits created by CombineFileInputFormat to fail. Contributed by Robert Parker and Jason Lowe

Added:
    hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/split/
    hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/split/TestJobSplitWriter.java   (with props)
Removed:
    hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestBlockLimits.java
Modified:
    hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt
    hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java
    hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml

Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt?rev=1540813&r1=1540812&r2=1540813&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt Mon Nov 11 19:22:38 2013
@@ -197,6 +197,10 @@ Release 2.3.0 - UNRELEASED
     MAPREDUCE-5585. TestCopyCommitter#testNoCommitAction Fails on JDK7
     (jeagles)
 
+    MAPREDUCE-5186. mapreduce.job.max.split.locations causes some splits
+    created by CombineFileInputFormat to fail (Robert Parker and Jason Lowe
+    via jlowe)
+
 Release 2.2.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java?rev=1540813&r1=1540812&r2=1540813&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java Mon Nov 11 19:22:38 2013
@@ -20,6 +20,7 @@ package org.apache.hadoop.mapreduce.spli
 
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -39,6 +40,9 @@ import org.apache.hadoop.mapreduce.split
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
 /**
  * The class that is used by the Job clients to write splits (both the meta
  * and the raw bytes parts)
@@ -47,6 +51,7 @@ import org.apache.hadoop.classification.
 @InterfaceStability.Unstable
 public class JobSplitWriter {
 
+  private static final Log LOG = LogFactory.getLog(JobSplitWriter.class);
   private static final int splitVersion = JobSplit.META_SPLIT_VERSION;
   private static final byte[] SPLIT_FILE_HEADER;
 
@@ -129,9 +134,10 @@ public class JobSplitWriter {
         long currCount = out.getPos();
         String[] locations = split.getLocations();
         if (locations.length > maxBlockLocations) {
-          throw new IOException("Max block location exceeded for split: "
+          LOG.warn("Max block location exceeded for split: "
               + split + " splitsize: " + locations.length +
               " maxsize: " + maxBlockLocations);
+          locations = Arrays.copyOf(locations, maxBlockLocations);
         }
         info[i++] = 
           new JobSplit.SplitMetaInfo( 
@@ -159,9 +165,10 @@ public class JobSplitWriter {
         long currLen = out.getPos();
         String[] locations = split.getLocations();
         if (locations.length > maxBlockLocations) {
-          throw new IOException("Max block location exceeded for split: "
+          LOG.warn("Max block location exceeded for split: "
               + split + " splitsize: " + locations.length +
               " maxsize: " + maxBlockLocations);
+          locations = Arrays.copyOf(locations,maxBlockLocations);
         }
         info[i++] = new JobSplit.SplitMetaInfo( 
             locations, offset,

Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml?rev=1540813&r1=1540812&r2=1540813&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml Mon Nov 11 19:22:38 2013
@@ -83,6 +83,14 @@
 </property>
 
 <property>
+    <name>mapreduce.job.max.split.locations</name>
+    <value>10</value>
+    <description>The max number of block locations to store for each split for 
+    locality calculation.
+    </description>
+</property>
+
+<property>
   <name>mapreduce.job.split.metainfo.maxsize</name>
   <value>10000000</value>
   <description>The maximum permissible size of the split metainfo file. 

Added: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/split/TestJobSplitWriter.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/split/TestJobSplitWriter.java?rev=1540813&view=auto
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/split/TestJobSplitWriter.java (added)
+++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/split/TestJobSplitWriter.java Mon Nov 11 19:22:38 2013
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.split;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.MRConfig;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.junit.Test;
+
+public class TestJobSplitWriter {
+
+  private static final File TEST_DIR = new File(
+      System.getProperty("test.build.data",
+          System.getProperty("java.io.tmpdir")), "TestJobSplitWriter");
+
+  @Test
+  public void testMaxBlockLocationsNewSplits() throws Exception {
+    TEST_DIR.mkdirs();
+    try {
+      Configuration conf = new Configuration();
+      conf.setInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY, 4);
+      Path submitDir = new Path(TEST_DIR.getAbsolutePath());
+      FileSystem fs = FileSystem.getLocal(conf);
+      FileSplit split = new FileSplit(new Path("/some/path"), 0, 1,
+          new String[] { "loc1", "loc2", "loc3", "loc4", "loc5" });
+      JobSplitWriter.createSplitFiles(submitDir, conf, fs,
+          new FileSplit[] { split });
+      JobSplit.TaskSplitMetaInfo[] infos =
+          SplitMetaInfoReader.readSplitMetaInfo(new JobID(), fs, conf,
+              submitDir);
+      assertEquals("unexpected number of splits", 1, infos.length);
+      assertEquals("unexpected number of split locations",
+          4, infos[0].getLocations().length);
+    } finally {
+      FileUtil.fullyDelete(TEST_DIR);
+    }
+  }
+
+  @Test
+  public void testMaxBlockLocationsOldSplits() throws Exception {
+    TEST_DIR.mkdirs();
+    try {
+      Configuration conf = new Configuration();
+      conf.setInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY, 4);
+      Path submitDir = new Path(TEST_DIR.getAbsolutePath());
+      FileSystem fs = FileSystem.getLocal(conf);
+      org.apache.hadoop.mapred.FileSplit split =
+          new org.apache.hadoop.mapred.FileSplit(new Path("/some/path"), 0, 1,
+              new String[] { "loc1", "loc2", "loc3", "loc4", "loc5" });
+      JobSplitWriter.createSplitFiles(submitDir, conf, fs,
+          new org.apache.hadoop.mapred.InputSplit[] { split });
+      JobSplit.TaskSplitMetaInfo[] infos =
+          SplitMetaInfoReader.readSplitMetaInfo(new JobID(), fs, conf,
+              submitDir);
+      assertEquals("unexpected number of splits", 1, infos.length);
+      assertEquals("unexpected number of split locations",
+          4, infos[0].getLocations().length);
+    } finally {
+      FileUtil.fullyDelete(TEST_DIR);
+    }
+  }
+}

Propchange: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/split/TestJobSplitWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native