You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by ss...@apache.org on 2012/10/16 02:03:53 UTC

svn commit: r1398581 [5/9] - in /hadoop/common/branches/MR-3902/hadoop-mapreduce-project: ./ bin/ conf/ hadoop-mapreduce-client/ hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/ hadoop-mapreduce-client/hadoop-...

Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java Tue Oct 16 00:02:55 2012
@@ -41,10 +41,10 @@ import org.apache.hadoop.fs.FSDataOutput
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Text;
@@ -80,15 +80,24 @@ public class TestMRJobs {
   private static final Log LOG = LogFactory.getLog(TestMRJobs.class);
 
   protected static MiniMRYarnCluster mrCluster;
+  protected static MiniDFSCluster dfsCluster;
 
   private static Configuration conf = new Configuration();
   private static FileSystem localFs;
+  private static FileSystem remoteFs;
   static {
     try {
       localFs = FileSystem.getLocal(conf);
     } catch (IOException io) {
       throw new RuntimeException("problem getting local fs", io);
     }
+    try {
+      dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(2)
+        .format(true).racks(null).build();
+      remoteFs = dfsCluster.getFileSystem();
+    } catch (IOException io) {
+      throw new RuntimeException("problem starting mini dfs cluster", io);
+    }
   }
 
   private static Path TEST_ROOT_DIR = new Path("target",
@@ -107,6 +116,8 @@ public class TestMRJobs {
     if (mrCluster == null) {
       mrCluster = new MiniMRYarnCluster(TestMRJobs.class.getName(), 3);
       Configuration conf = new Configuration();
+      conf.set("fs.defaultFS", remoteFs.getUri().toString());   // use HDFS
+      conf.set(MRJobConfig.MR_AM_STAGING_DIR, "/apps_staging_dir");
       mrCluster.init(conf);
       mrCluster.start();
     }
@@ -123,6 +134,10 @@ public class TestMRJobs {
       mrCluster.stop();
       mrCluster = null;
     }
+    if (dfsCluster != null) {
+      dfsCluster.shutdown();
+      dfsCluster = null;
+    }
   }
 
   @Test
@@ -403,7 +418,6 @@ public class TestMRJobs {
       Configuration conf = context.getConfiguration();
       Path[] files = context.getLocalCacheFiles();
       Path[] archives = context.getLocalCacheArchives();
-      FileSystem fs = LocalFileSystem.get(conf);
 
       // Check that 4 (2 + appjar + DistrubutedCacheChecker jar) files 
       // and 2 archives are present
@@ -411,13 +425,13 @@ public class TestMRJobs {
       Assert.assertEquals(2, archives.length);
 
       // Check lengths of the files
-      Assert.assertEquals(1, fs.getFileStatus(files[1]).getLen());
-      Assert.assertTrue(fs.getFileStatus(files[2]).getLen() > 1);
+      Assert.assertEquals(1, localFs.getFileStatus(files[1]).getLen());
+      Assert.assertTrue(localFs.getFileStatus(files[2]).getLen() > 1);
 
       // Check extraction of the archive
-      Assert.assertTrue(fs.exists(new Path(archives[0],
+      Assert.assertTrue(localFs.exists(new Path(archives[0],
           "distributed.jar.inside3")));
-      Assert.assertTrue(fs.exists(new Path(archives[1],
+      Assert.assertTrue(localFs.exists(new Path(archives[1],
           "distributed.jar.inside4")));
 
       // Check the class loaders
@@ -448,8 +462,7 @@ public class TestMRJobs {
     }
   }
 
-  @Test
-  public void testDistributedCache() throws Exception {
+  public void _testDistributedCache(String jobJarPath) throws Exception {
     if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
       LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
            + " not found. Not running test.");
@@ -470,11 +483,13 @@ public class TestMRJobs {
     
     // Set the job jar to a new "dummy" jar so we can check that its extracted 
     // properly
-    job.setJar(makeJobJarWithLib(TEST_ROOT_DIR.toUri().toString()));
+    job.setJar(jobJarPath);
     // Because the job jar is a "dummy" jar, we need to include the jar with
     // DistributedCacheChecker or it won't be able to find it
-    job.addFileToClassPath(new Path(
-            JarFinder.getJar(DistributedCacheChecker.class)));
+    Path distributedCacheCheckerJar = new Path(
+            JarFinder.getJar(DistributedCacheChecker.class));
+    job.addFileToClassPath(distributedCacheCheckerJar.makeQualified(
+            localFs.getUri(), distributedCacheCheckerJar.getParent()));
     
     job.setMapperClass(DistributedCacheChecker.class);
     job.setOutputFormatClass(NullOutputFormat.class);
@@ -484,7 +499,9 @@ public class TestMRJobs {
     job.addCacheFile(
         new URI(first.toUri().toString() + "#distributed.first.symlink"));
     job.addFileToClassPath(second);
-    job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
+    // The AppMaster jar itself
+    job.addFileToClassPath(
+            APP_JAR.makeQualified(localFs.getUri(), APP_JAR.getParent())); 
     job.addArchiveToClassPath(third);
     job.addCacheArchive(fourth.toUri());
     job.setMaxMapAttempts(1); // speed up failures
@@ -497,6 +514,23 @@ public class TestMRJobs {
                       " but didn't Match Job ID " + jobId ,
           trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
   }
+  
+  @Test
+  public void testDistributedCache() throws Exception {
+    // Test with a local (file:///) Job Jar
+    Path localJobJarPath = makeJobJarWithLib(TEST_ROOT_DIR.toUri().toString());
+    _testDistributedCache(localJobJarPath.toUri().toString());
+    
+    // Test with a remote (hdfs://) Job Jar
+    Path remoteJobJarPath = new Path(remoteFs.getUri().toString() + "/",
+            localJobJarPath.getName());
+    remoteFs.moveFromLocalFile(localJobJarPath, remoteJobJarPath);
+    File localJobJarFile = new File(localJobJarPath.toUri().toString());
+    if (localJobJarFile.exists()) {     // just to make sure
+        localJobJarFile.delete();
+    }
+    _testDistributedCache(remoteJobJarPath.toUri().toString());
+  }
 
   private Path createTempFile(String filename, String contents)
       throws IOException {
@@ -522,7 +556,7 @@ public class TestMRJobs {
     return p;
   }
   
-  private String makeJobJarWithLib(String testDir) throws FileNotFoundException, 
+  private Path makeJobJarWithLib(String testDir) throws FileNotFoundException, 
       IOException{
     Path jobJarPath = new Path(testDir, "thejob.jar");
     FileOutputStream fos =
@@ -535,7 +569,7 @@ public class TestMRJobs {
             new Path(testDir, "lib2.jar").toUri().getPath()));
     jos.close();
     localFs.setPermission(jobJarPath, new FsPermission("700"));
-    return jobJarPath.toUri().toString();
+    return jobJarPath;
   }
   
   private void createAndAddJarToJar(JarOutputStream jos, File jarFile) 

Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java Tue Oct 16 00:02:55 2012
@@ -177,8 +177,13 @@ public class TestYARNRunner extends Test
   @Test
   public void testResourceMgrDelegate() throws Exception {
     /* we not want a mock of resourcemgr deleagte */
-    ClientRMProtocol clientRMProtocol = mock(ClientRMProtocol.class);
-    ResourceMgrDelegate delegate = new ResourceMgrDelegate(conf, clientRMProtocol);
+    final ClientRMProtocol clientRMProtocol = mock(ClientRMProtocol.class);
+    ResourceMgrDelegate delegate = new ResourceMgrDelegate(conf) {
+      @Override
+      public synchronized void start() {
+        this.rmClient = clientRMProtocol;
+      }
+    };
     /* make sure kill calls finish application master */
     when(clientRMProtocol.forceKillApplication(any(KillApplicationRequest.class)))
     .thenReturn(null);

Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml Tue Oct 16 00:02:55 2012
@@ -133,7 +133,31 @@
       <groupId>org.jboss.netty</groupId>
       <artifactId>netty</artifactId>
     </dependency>
-
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-cli</groupId>
+      <artifactId>commons-cli</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+      <scope>provided</scope>
+    </dependency>
   </dependencies>
  
   <build>
@@ -148,6 +172,18 @@
           <effort>Max</effort>
        </configuration>
      </plugin>
+     <plugin>
+       <groupId>org.apache.maven.plugins</groupId>
+       <artifactId>maven-surefire-plugin</artifactId>
+       <configuration>
+         <properties>
+           <property>
+             <name>listener</name>
+             <value>org.apache.hadoop.test.TimedOutTestsListener</value>
+           </property>
+         </properties>
+       </configuration>
+     </plugin>
     </plugins>
   </build>
  

Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/DBCountPageView.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/DBCountPageView.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/DBCountPageView.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/DBCountPageView.java Tue Oct 16 00:02:55 2012
@@ -27,7 +27,6 @@ import java.sql.PreparedStatement;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Statement;
-import java.util.Iterator;
 import java.util.Random;
 
 import org.apache.commons.logging.Log;
@@ -82,6 +81,7 @@ public class DBCountPageView extends Con
   
   private Connection connection;
   private boolean initialized = false;
+  private boolean isOracle = false;
 
   private static final String[] AccessFieldNames = {"url", "referrer", "time"};
   private static final String[] PageviewFieldNames = {"url", "pageview"};
@@ -102,7 +102,9 @@ public class DBCountPageView extends Con
   
   private void createConnection(String driverClassName
       , String url) throws Exception {
-    
+    if(driverClassName.toLowerCase().contains("oracle")) {
+      isOracle = true;
+    }
     Class.forName(driverClassName);
     connection = DriverManager.getConnection(url);
     connection.setAutoCommit(false);
@@ -142,7 +144,7 @@ public class DBCountPageView extends Con
   }
   
   private void dropTables() {
-    String dropAccess = "DROP TABLE Access";
+    String dropAccess = "DROP TABLE HAccess";
     String dropPageview = "DROP TABLE Pageview";
     Statement st = null;
     try {
@@ -157,18 +159,21 @@ public class DBCountPageView extends Con
   }
   
   private void createTables() throws SQLException {
-
+	String dataType = "BIGINT NOT NULL";
+	if(isOracle) {
+	  dataType = "NUMBER(19) NOT NULL";
+	}
     String createAccess = 
       "CREATE TABLE " +
-      "Access(url      VARCHAR(100) NOT NULL," +
+      "HAccess(url      VARCHAR(100) NOT NULL," +
             " referrer VARCHAR(100)," +
-            " time     BIGINT NOT NULL, " +
+            " time     " + dataType + ", " +
             " PRIMARY KEY (url, time))";
 
     String createPageview = 
       "CREATE TABLE " +
       "Pageview(url      VARCHAR(100) NOT NULL," +
-              " pageview     BIGINT NOT NULL, " +
+              " pageview     " + dataType + ", " +
                " PRIMARY KEY (url))";
     
     Statement st = connection.createStatement();
@@ -189,7 +194,7 @@ public class DBCountPageView extends Con
     PreparedStatement statement = null ;
     try {
       statement = connection.prepareStatement(
-          "INSERT INTO Access(url, referrer, time)" +
+          "INSERT INTO HAccess(url, referrer, time)" +
           " VALUES (?, ?, ?)");
 
       Random random = new Random();
@@ -248,7 +253,7 @@ public class DBCountPageView extends Con
   /**Verifies the results are correct */
   private boolean verify() throws SQLException {
     //check total num pageview
-    String countAccessQuery = "SELECT COUNT(*) FROM Access";
+    String countAccessQuery = "SELECT COUNT(*) FROM HAccess";
     String sumPageviewQuery = "SELECT SUM(pageview) FROM Pageview";
     Statement st = null;
     ResultSet rs = null;
@@ -396,7 +401,7 @@ public class DBCountPageView extends Con
 
     DBConfiguration.configureDB(conf, driverClassName, url);
 
-    Job job = new Job(conf);
+    Job job = Job.getInstance(conf);
         
     job.setJobName("Count Pageviews of URLs");
     job.setJarByClass(DBCountPageView.class);
@@ -404,7 +409,7 @@ public class DBCountPageView extends Con
     job.setCombinerClass(LongSumReducer.class);
     job.setReducerClass(PageviewReducer.class);
 
-    DBInputFormat.setInput(job, AccessRecord.class, "Access"
+    DBInputFormat.setInput(job, AccessRecord.class, "HAccess"
         , null, "url", AccessFieldNames);
 
     DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);

Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/SecondarySort.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/SecondarySort.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/SecondarySort.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/SecondarySort.java Tue Oct 16 00:02:55 2012
@@ -211,7 +211,7 @@ public class SecondarySort {
     Configuration conf = new Configuration();
     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
     if (otherArgs.length != 2) {
-      System.err.println("Usage: secondarysrot <in> <out>");
+      System.err.println("Usage: secondarysort <in> <out>");
       System.exit(2);
     }
     Job job = new Job(conf, "secondary sort");

Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMean.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMean.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMean.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMean.java Tue Oct 16 00:02:55 2012
@@ -1,196 +1,196 @@
-package org.apache.hadoop.examples;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.StringTokenizer;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-public class WordMean extends Configured implements Tool {
-
-  private double mean = 0;
-
-  private final static Text COUNT = new Text("count");
-  private final static Text LENGTH = new Text("length");
-  private final static LongWritable ONE = new LongWritable(1);
-
-  /**
-   * Maps words from line of text into 2 key-value pairs; one key-value pair for
-   * counting the word, another for counting its length.
-   */
-  public static class WordMeanMapper extends
-      Mapper<Object, Text, Text, LongWritable> {
-
-    private LongWritable wordLen = new LongWritable();
-
-    /**
-     * Emits 2 key-value pairs for counting the word and its length. Outputs are
-     * (Text, LongWritable).
-     * 
-     * @param value
-     *          This will be a line of text coming in from our input file.
-     */
-    public void map(Object key, Text value, Context context)
-        throws IOException, InterruptedException {
-      StringTokenizer itr = new StringTokenizer(value.toString());
-      while (itr.hasMoreTokens()) {
-        String string = itr.nextToken();
-        this.wordLen.set(string.length());
-        context.write(LENGTH, this.wordLen);
-        context.write(COUNT, ONE);
-      }
-    }
-  }
-
-  /**
-   * Performs integer summation of all the values for each key.
-   */
-  public static class WordMeanReducer extends
-      Reducer<Text, LongWritable, Text, LongWritable> {
-
-    private LongWritable sum = new LongWritable();
-
-    /**
-     * Sums all the individual values within the iterator and writes them to the
-     * same key.
-     * 
-     * @param key
-     *          This will be one of 2 constants: LENGTH_STR or COUNT_STR.
-     * @param values
-     *          This will be an iterator of all the values associated with that
-     *          key.
-     */
-    public void reduce(Text key, Iterable<LongWritable> values, Context context)
-        throws IOException, InterruptedException {
-
-      int theSum = 0;
-      for (LongWritable val : values) {
-        theSum += val.get();
-      }
-      sum.set(theSum);
-      context.write(key, sum);
-    }
-  }
-
-  /**
-   * Reads the output file and parses the summation of lengths, and the word
-   * count, to perform a quick calculation of the mean.
-   * 
-   * @param path
-   *          The path to find the output file in. Set in main to the output
-   *          directory.
-   * @throws IOException
-   *           If it cannot access the output directory, we throw an exception.
-   */
-  private double readAndCalcMean(Path path, Configuration conf)
-      throws IOException {
-    FileSystem fs = FileSystem.get(conf);
-    Path file = new Path(path, "part-r-00000");
-
-    if (!fs.exists(file))
-      throw new IOException("Output not found!");
-
-    BufferedReader br = null;
-
-    // average = total sum / number of elements;
-    try {
-      br = new BufferedReader(new InputStreamReader(fs.open(file)));
-
-      long count = 0;
-      long length = 0;
-
-      String line;
-      while ((line = br.readLine()) != null) {
-        StringTokenizer st = new StringTokenizer(line);
-
-        // grab type
-        String type = st.nextToken();
-
-        // differentiate
-        if (type.equals(COUNT.toString())) {
-          String countLit = st.nextToken();
-          count = Long.parseLong(countLit);
-        } else if (type.equals(LENGTH.toString())) {
-          String lengthLit = st.nextToken();
-          length = Long.parseLong(lengthLit);
-        }
-      }
-
-      double theMean = (((double) length) / ((double) count));
-      System.out.println("The mean is: " + theMean);
-      return theMean;
-    } finally {
-      br.close();
-    }
-  }
-
-  public static void main(String[] args) throws Exception {
-    ToolRunner.run(new Configuration(), new WordMean(), args);
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    if (args.length != 2) {
-      System.err.println("Usage: wordmean <in> <out>");
-      return 0;
-    }
-
-    Configuration conf = getConf();
-
-    @SuppressWarnings("deprecation")
-    Job job = new Job(conf, "word mean");
-    job.setJarByClass(WordMean.class);
-    job.setMapperClass(WordMeanMapper.class);
-    job.setCombinerClass(WordMeanReducer.class);
-    job.setReducerClass(WordMeanReducer.class);
-    job.setOutputKeyClass(Text.class);
-    job.setOutputValueClass(LongWritable.class);
-    FileInputFormat.addInputPath(job, new Path(args[0]));
-    Path outputpath = new Path(args[1]);
-    FileOutputFormat.setOutputPath(job, outputpath);
-    boolean result = job.waitForCompletion(true);
-    mean = readAndCalcMean(outputpath, conf);
-
-    return (result ? 0 : 1);
-  }
-
-  /**
-   * Only valuable after run() called.
-   * 
-   * @return Returns the mean value.
-   */
-  public double getMean() {
-    return mean;
-  }
+package org.apache.hadoop.examples;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class WordMean extends Configured implements Tool {
+
+  private double mean = 0;
+
+  private final static Text COUNT = new Text("count");
+  private final static Text LENGTH = new Text("length");
+  private final static LongWritable ONE = new LongWritable(1);
+
+  /**
+   * Maps words from line of text into 2 key-value pairs; one key-value pair for
+   * counting the word, another for counting its length.
+   */
+  public static class WordMeanMapper extends
+      Mapper<Object, Text, Text, LongWritable> {
+
+    private LongWritable wordLen = new LongWritable();
+
+    /**
+     * Emits 2 key-value pairs for counting the word and its length. Outputs are
+     * (Text, LongWritable).
+     * 
+     * @param value
+     *          This will be a line of text coming in from our input file.
+     */
+    public void map(Object key, Text value, Context context)
+        throws IOException, InterruptedException {
+      StringTokenizer itr = new StringTokenizer(value.toString());
+      while (itr.hasMoreTokens()) {
+        String string = itr.nextToken();
+        this.wordLen.set(string.length());
+        context.write(LENGTH, this.wordLen);
+        context.write(COUNT, ONE);
+      }
+    }
+  }
+
+  /**
+   * Performs integer summation of all the values for each key.
+   */
+  public static class WordMeanReducer extends
+      Reducer<Text, LongWritable, Text, LongWritable> {
+
+    private LongWritable sum = new LongWritable();
+
+    /**
+     * Sums all the individual values within the iterator and writes them to the
+     * same key.
+     * 
+     * @param key
+     *          This will be one of 2 constants: LENGTH_STR or COUNT_STR.
+     * @param values
+     *          This will be an iterator of all the values associated with that
+     *          key.
+     */
+    public void reduce(Text key, Iterable<LongWritable> values, Context context)
+        throws IOException, InterruptedException {
+
+      int theSum = 0;
+      for (LongWritable val : values) {
+        theSum += val.get();
+      }
+      sum.set(theSum);
+      context.write(key, sum);
+    }
+  }
+
+  /**
+   * Reads the output file and parses the summation of lengths, and the word
+   * count, to perform a quick calculation of the mean.
+   * 
+   * @param path
+   *          The path to find the output file in. Set in main to the output
+   *          directory.
+   * @throws IOException
+   *           If it cannot access the output directory, we throw an exception.
+   */
+  private double readAndCalcMean(Path path, Configuration conf)
+      throws IOException {
+    FileSystem fs = FileSystem.get(conf);
+    Path file = new Path(path, "part-r-00000");
+
+    if (!fs.exists(file))
+      throw new IOException("Output not found!");
+
+    BufferedReader br = null;
+
+    // average = total sum / number of elements;
+    try {
+      br = new BufferedReader(new InputStreamReader(fs.open(file)));
+
+      long count = 0;
+      long length = 0;
+
+      String line;
+      while ((line = br.readLine()) != null) {
+        StringTokenizer st = new StringTokenizer(line);
+
+        // grab type
+        String type = st.nextToken();
+
+        // differentiate
+        if (type.equals(COUNT.toString())) {
+          String countLit = st.nextToken();
+          count = Long.parseLong(countLit);
+        } else if (type.equals(LENGTH.toString())) {
+          String lengthLit = st.nextToken();
+          length = Long.parseLong(lengthLit);
+        }
+      }
+
+      double theMean = (((double) length) / ((double) count));
+      System.out.println("The mean is: " + theMean);
+      return theMean;
+    } finally {
+      br.close();
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    ToolRunner.run(new Configuration(), new WordMean(), args);
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    if (args.length != 2) {
+      System.err.println("Usage: wordmean <in> <out>");
+      return 0;
+    }
+
+    Configuration conf = getConf();
+
+    @SuppressWarnings("deprecation")
+    Job job = new Job(conf, "word mean");
+    job.setJarByClass(WordMean.class);
+    job.setMapperClass(WordMeanMapper.class);
+    job.setCombinerClass(WordMeanReducer.class);
+    job.setReducerClass(WordMeanReducer.class);
+    job.setOutputKeyClass(Text.class);
+    job.setOutputValueClass(LongWritable.class);
+    FileInputFormat.addInputPath(job, new Path(args[0]));
+    Path outputpath = new Path(args[1]);
+    FileOutputFormat.setOutputPath(job, outputpath);
+    boolean result = job.waitForCompletion(true);
+    mean = readAndCalcMean(outputpath, conf);
+
+    return (result ? 0 : 1);
+  }
+
+  /**
+   * Only valuable after run() called.
+   * 
+   * @return Returns the mean value.
+   */
+  public double getMean() {
+    return mean;
+  }
 }
\ No newline at end of file

Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMedian.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMedian.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMedian.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMedian.java Tue Oct 16 00:02:55 2012
@@ -1,208 +1,208 @@
-package org.apache.hadoop.examples;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.StringTokenizer;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.TaskCounter;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-public class WordMedian extends Configured implements Tool {
-
-  private double median = 0;
-  private final static IntWritable ONE = new IntWritable(1);
-
-  /**
-   * Maps words from line of text into a key-value pair; the length of the word
-   * as the key, and 1 as the value.
-   */
-  public static class WordMedianMapper extends
-      Mapper<Object, Text, IntWritable, IntWritable> {
-
-    private IntWritable length = new IntWritable();
-
-    /**
-     * Emits a key-value pair for counting the word. Outputs are (IntWritable,
-     * IntWritable).
-     * 
-     * @param value
-     *          This will be a line of text coming in from our input file.
-     */
-    public void map(Object key, Text value, Context context)
-        throws IOException, InterruptedException {
-      StringTokenizer itr = new StringTokenizer(value.toString());
-      while (itr.hasMoreTokens()) {
-        String string = itr.nextToken();
-        length.set(string.length());
-        context.write(length, ONE);
-      }
-    }
-  }
-
-  /**
-   * Performs integer summation of all the values for each key.
-   */
-  public static class WordMedianReducer extends
-      Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
-
-    private IntWritable val = new IntWritable();
-
-    /**
-     * Sums all the individual values within the iterator and writes them to the
-     * same key.
-     * 
-     * @param key
-     *          This will be a length of a word that was read.
-     * @param values
-     *          This will be an iterator of all the values associated with that
-     *          key.
-     */
-    public void reduce(IntWritable key, Iterable<IntWritable> values,
-        Context context) throws IOException, InterruptedException {
-
-      int sum = 0;
-      for (IntWritable value : values) {
-        sum += value.get();
-      }
-      val.set(sum);
-      context.write(key, val);
-    }
-  }
-
-  /**
-   * This is a standard program to read and find a median value based on a file
-   * of word counts such as: 1 456, 2 132, 3 56... Where the first values are
-   * the word lengths and the following values are the number of times that
-   * words of that length appear.
-   * 
-   * @param path
-   *          The path to read the HDFS file from (part-r-00000...00001...etc).
-   * @param medianIndex1
-   *          The first length value to look for.
-   * @param medianIndex2
-   *          The second length value to look for (will be the same as the first
-   *          if there are an even number of words total).
-   * @throws IOException
-   *           If file cannot be found, we throw an exception.
-   * */
-  private double readAndFindMedian(String path, int medianIndex1,
-      int medianIndex2, Configuration conf) throws IOException {
-    FileSystem fs = FileSystem.get(conf);
-    Path file = new Path(path, "part-r-00000");
-
-    if (!fs.exists(file))
-      throw new IOException("Output not found!");
-
-    BufferedReader br = null;
-
-    try {
-      br = new BufferedReader(new InputStreamReader(fs.open(file)));
-      int num = 0;
-
-      String line;
-      while ((line = br.readLine()) != null) {
-        StringTokenizer st = new StringTokenizer(line);
-
-        // grab length
-        String currLen = st.nextToken();
-
-        // grab count
-        String lengthFreq = st.nextToken();
-
-        int prevNum = num;
-        num += Integer.parseInt(lengthFreq);
-
-        if (medianIndex2 >= prevNum && medianIndex1 <= num) {
-          System.out.println("The median is: " + currLen);
-          br.close();
-          return Double.parseDouble(currLen);
-        } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
-          String nextCurrLen = st.nextToken();
-          double theMedian = (Integer.parseInt(currLen) + Integer
-              .parseInt(nextCurrLen)) / 2.0;
-          System.out.println("The median is: " + theMedian);
-          br.close();
-          return theMedian;
-        }
-      }
-    } finally {
-      br.close();
-    }
-    // error, no median found
-    return -1;
-  }
-
-  public static void main(String[] args) throws Exception {
-    ToolRunner.run(new Configuration(), new WordMedian(), args);
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    if (args.length != 2) {
-      System.err.println("Usage: wordmedian <in> <out>");
-      return 0;
-    }
-
-    setConf(new Configuration());
-    Configuration conf = getConf();
-
-    @SuppressWarnings("deprecation")
-    Job job = new Job(conf, "word median");
-    job.setJarByClass(WordMedian.class);
-    job.setMapperClass(WordMedianMapper.class);
-    job.setCombinerClass(WordMedianReducer.class);
-    job.setReducerClass(WordMedianReducer.class);
-    job.setOutputKeyClass(IntWritable.class);
-    job.setOutputValueClass(IntWritable.class);
-    FileInputFormat.addInputPath(job, new Path(args[0]));
-    FileOutputFormat.setOutputPath(job, new Path(args[1]));
-    boolean result = job.waitForCompletion(true);
-
-    // Wait for JOB 1 -- get middle value to check for Median
-
-    long totalWords = job.getCounters()
-        .getGroup(TaskCounter.class.getCanonicalName())
-        .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
-    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
-    int medianIndex2 = (int) Math.floor((totalWords / 2.0));
-
-    median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
-
-    return (result ? 0 : 1);
-  }
-
-  public double getMedian() {
-    return median;
-  }
-}
+package org.apache.hadoop.examples;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskCounter;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class WordMedian extends Configured implements Tool {
+
+  private double median = 0;
+  private final static IntWritable ONE = new IntWritable(1);
+
+  /**
+   * Maps words from line of text into a key-value pair; the length of the word
+   * as the key, and 1 as the value.
+   */
+  public static class WordMedianMapper extends
+      Mapper<Object, Text, IntWritable, IntWritable> {
+
+    private IntWritable length = new IntWritable();
+
+    /**
+     * Emits a key-value pair for counting the word. Outputs are (IntWritable,
+     * IntWritable).
+     * 
+     * @param value
+     *          This will be a line of text coming in from our input file.
+     */
+    public void map(Object key, Text value, Context context)
+        throws IOException, InterruptedException {
+      StringTokenizer itr = new StringTokenizer(value.toString());
+      while (itr.hasMoreTokens()) {
+        String string = itr.nextToken();
+        length.set(string.length());
+        context.write(length, ONE);
+      }
+    }
+  }
+
+  /**
+   * Performs integer summation of all the values for each key.
+   */
+  public static class WordMedianReducer extends
+      Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
+
+    private IntWritable val = new IntWritable();
+
+    /**
+     * Sums all the individual values within the iterator and writes them to the
+     * same key.
+     * 
+     * @param key
+     *          This will be a length of a word that was read.
+     * @param values
+     *          This will be an iterator of all the values associated with that
+     *          key.
+     */
+    public void reduce(IntWritable key, Iterable<IntWritable> values,
+        Context context) throws IOException, InterruptedException {
+
+      int sum = 0;
+      for (IntWritable value : values) {
+        sum += value.get();
+      }
+      val.set(sum);
+      context.write(key, val);
+    }
+  }
+
+  /**
+   * This is a standard program to read and find a median value based on a file
+   * of word counts such as: 1 456, 2 132, 3 56... Where the first values are
+   * the word lengths and the following values are the number of times that
+   * words of that length appear.
+   * 
+   * @param path
+   *          The path to read the HDFS file from (part-r-00000...00001...etc).
+   * @param medianIndex1
+   *          The first length value to look for.
+   * @param medianIndex2
+   *          The second length value to look for (will be the same as the first
+   *          if there are an even number of words total).
+   * @throws IOException
+   *           If file cannot be found, we throw an exception.
+   * */
+  private double readAndFindMedian(String path, int medianIndex1,
+      int medianIndex2, Configuration conf) throws IOException {
+    FileSystem fs = FileSystem.get(conf);
+    Path file = new Path(path, "part-r-00000");
+
+    if (!fs.exists(file))
+      throw new IOException("Output not found!");
+
+    BufferedReader br = null;
+
+    try {
+      br = new BufferedReader(new InputStreamReader(fs.open(file)));
+      int num = 0;
+
+      String line;
+      while ((line = br.readLine()) != null) {
+        StringTokenizer st = new StringTokenizer(line);
+
+        // grab length
+        String currLen = st.nextToken();
+
+        // grab count
+        String lengthFreq = st.nextToken();
+
+        int prevNum = num;
+        num += Integer.parseInt(lengthFreq);
+
+        if (medianIndex2 >= prevNum && medianIndex1 <= num) {
+          System.out.println("The median is: " + currLen);
+          br.close();
+          return Double.parseDouble(currLen);
+        } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
+          String nextCurrLen = st.nextToken();
+          double theMedian = (Integer.parseInt(currLen) + Integer
+              .parseInt(nextCurrLen)) / 2.0;
+          System.out.println("The median is: " + theMedian);
+          br.close();
+          return theMedian;
+        }
+      }
+    } finally {
+      br.close();
+    }
+    // error, no median found
+    return -1;
+  }
+
+  public static void main(String[] args) throws Exception {
+    ToolRunner.run(new Configuration(), new WordMedian(), args);
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    if (args.length != 2) {
+      System.err.println("Usage: wordmedian <in> <out>");
+      return 0;
+    }
+
+    setConf(new Configuration());
+    Configuration conf = getConf();
+
+    @SuppressWarnings("deprecation")
+    Job job = new Job(conf, "word median");
+    job.setJarByClass(WordMedian.class);
+    job.setMapperClass(WordMedianMapper.class);
+    job.setCombinerClass(WordMedianReducer.class);
+    job.setReducerClass(WordMedianReducer.class);
+    job.setOutputKeyClass(IntWritable.class);
+    job.setOutputValueClass(IntWritable.class);
+    FileInputFormat.addInputPath(job, new Path(args[0]));
+    FileOutputFormat.setOutputPath(job, new Path(args[1]));
+    boolean result = job.waitForCompletion(true);
+
+    // Wait for JOB 1 -- get middle value to check for Median
+
+    long totalWords = job.getCounters()
+        .getGroup(TaskCounter.class.getCanonicalName())
+        .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
+    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
+    int medianIndex2 = (int) Math.floor((totalWords / 2.0));
+
+    median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
+
+    return (result ? 0 : 1);
+  }
+
+  public double getMedian() {
+    return median;
+  }
+}

Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java Tue Oct 16 00:02:55 2012
@@ -1,210 +1,210 @@
-package org.apache.hadoop.examples;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.StringTokenizer;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-public class WordStandardDeviation extends Configured implements Tool {
-
-  private double stddev = 0;
-
-  private final static Text LENGTH = new Text("length");
-  private final static Text SQUARE = new Text("square");
-  private final static Text COUNT = new Text("count");
-  private final static LongWritable ONE = new LongWritable(1);
-
-  /**
-   * Maps words from line of text into 3 key-value pairs; one key-value pair for
-   * counting the word, one for counting its length, and one for counting the
-   * square of its length.
-   */
-  public static class WordStandardDeviationMapper extends
-      Mapper<Object, Text, Text, LongWritable> {
-
-    private LongWritable wordLen = new LongWritable();
-    private LongWritable wordLenSq = new LongWritable();
-
-    /**
-     * Emits 3 key-value pairs for counting the word, its length, and the
-     * squares of its length. Outputs are (Text, LongWritable).
-     * 
-     * @param value
-     *          This will be a line of text coming in from our input file.
-     */
-    public void map(Object key, Text value, Context context)
-        throws IOException, InterruptedException {
-      StringTokenizer itr = new StringTokenizer(value.toString());
-      while (itr.hasMoreTokens()) {
-        String string = itr.nextToken();
-
-        this.wordLen.set(string.length());
-
-        // the square of an integer is an integer...
-        this.wordLenSq.set((long) Math.pow(string.length(), 2.0));
-
-        context.write(LENGTH, this.wordLen);
-        context.write(SQUARE, this.wordLenSq);
-        context.write(COUNT, ONE);
-      }
-    }
-  }
-
-  /**
-   * Performs integer summation of all the values for each key.
-   */
-  public static class WordStandardDeviationReducer extends
-      Reducer<Text, LongWritable, Text, LongWritable> {
-
-    private LongWritable val = new LongWritable();
-
-    /**
-     * Sums all the individual values within the iterator and writes them to the
-     * same key.
-     * 
-     * @param key
-     *          This will be one of 2 constants: LENGTH_STR, COUNT_STR, or
-     *          SQUARE_STR.
-     * @param values
-     *          This will be an iterator of all the values associated with that
-     *          key.
-     */
-    public void reduce(Text key, Iterable<LongWritable> values, Context context)
-        throws IOException, InterruptedException {
-
-      int sum = 0;
-      for (LongWritable value : values) {
-        sum += value.get();
-      }
-      val.set(sum);
-      context.write(key, val);
-    }
-  }
-
-  /**
-   * Reads the output file and parses the summation of lengths, the word count,
-   * and the lengths squared, to perform a quick calculation of the standard
-   * deviation.
-   * 
-   * @param path
-   *          The path to find the output file in. Set in main to the output
-   *          directory.
-   * @throws IOException
-   *           If it cannot access the output directory, we throw an exception.
-   */
-  private double readAndCalcStdDev(Path path, Configuration conf)
-      throws IOException {
-    FileSystem fs = FileSystem.get(conf);
-    Path file = new Path(path, "part-r-00000");
-
-    if (!fs.exists(file))
-      throw new IOException("Output not found!");
-
-    double stddev = 0;
-    BufferedReader br = null;
-    try {
-      br = new BufferedReader(new InputStreamReader(fs.open(file)));
-      long count = 0;
-      long length = 0;
-      long square = 0;
-      String line;
-      while ((line = br.readLine()) != null) {
-        StringTokenizer st = new StringTokenizer(line);
-
-        // grab type
-        String type = st.nextToken();
-
-        // differentiate
-        if (type.equals(COUNT.toString())) {
-          String countLit = st.nextToken();
-          count = Long.parseLong(countLit);
-        } else if (type.equals(LENGTH.toString())) {
-          String lengthLit = st.nextToken();
-          length = Long.parseLong(lengthLit);
-        } else if (type.equals(SQUARE.toString())) {
-          String squareLit = st.nextToken();
-          square = Long.parseLong(squareLit);
-        }
-      }
-      // average = total sum / number of elements;
-      double mean = (((double) length) / ((double) count));
-      // standard deviation = sqrt((sum(lengths ^ 2)/count) - (mean ^ 2))
-      mean = Math.pow(mean, 2.0);
-      double term = (((double) square / ((double) count)));
-      stddev = Math.sqrt((term - mean));
-      System.out.println("The standard deviation is: " + stddev);
-    } finally {
-      br.close();
-    }
-    return stddev;
-  }
-
-  public static void main(String[] args) throws Exception {
-    ToolRunner.run(new Configuration(), new WordStandardDeviation(),
-        args);
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    if (args.length != 2) {
-      System.err.println("Usage: wordstddev <in> <out>");
-      return 0;
-    }
-
-    Configuration conf = getConf();
-
-    @SuppressWarnings("deprecation")
-    Job job = new Job(conf, "word stddev");
-    job.setJarByClass(WordStandardDeviation.class);
-    job.setMapperClass(WordStandardDeviationMapper.class);
-    job.setCombinerClass(WordStandardDeviationReducer.class);
-    job.setReducerClass(WordStandardDeviationReducer.class);
-    job.setOutputKeyClass(Text.class);
-    job.setOutputValueClass(LongWritable.class);
-    FileInputFormat.addInputPath(job, new Path(args[0]));
-    Path outputpath = new Path(args[1]);
-    FileOutputFormat.setOutputPath(job, outputpath);
-    boolean result = job.waitForCompletion(true);
-
-    // read output and calculate standard deviation
-    stddev = readAndCalcStdDev(outputpath, conf);
-
-    return (result ? 0 : 1);
-  }
-
-  public double getStandardDeviation() {
-    return stddev;
-  }
+package org.apache.hadoop.examples;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class WordStandardDeviation extends Configured implements Tool {
+
+  private double stddev = 0;
+
+  private final static Text LENGTH = new Text("length");
+  private final static Text SQUARE = new Text("square");
+  private final static Text COUNT = new Text("count");
+  private final static LongWritable ONE = new LongWritable(1);
+
+  /**
+   * Maps words from line of text into 3 key-value pairs; one key-value pair for
+   * counting the word, one for counting its length, and one for counting the
+   * square of its length.
+   */
+  public static class WordStandardDeviationMapper extends
+      Mapper<Object, Text, Text, LongWritable> {
+
+    private LongWritable wordLen = new LongWritable();
+    private LongWritable wordLenSq = new LongWritable();
+
+    /**
+     * Emits 3 key-value pairs for counting the word, its length, and the
+     * squares of its length. Outputs are (Text, LongWritable).
+     * 
+     * @param value
+     *          This will be a line of text coming in from our input file.
+     */
+    public void map(Object key, Text value, Context context)
+        throws IOException, InterruptedException {
+      StringTokenizer itr = new StringTokenizer(value.toString());
+      while (itr.hasMoreTokens()) {
+        String string = itr.nextToken();
+
+        this.wordLen.set(string.length());
+
+        // the square of an integer is an integer...
+        this.wordLenSq.set((long) Math.pow(string.length(), 2.0));
+
+        context.write(LENGTH, this.wordLen);
+        context.write(SQUARE, this.wordLenSq);
+        context.write(COUNT, ONE);
+      }
+    }
+  }
+
+  /**
+   * Performs integer summation of all the values for each key.
+   */
+  public static class WordStandardDeviationReducer extends
+      Reducer<Text, LongWritable, Text, LongWritable> {
+
+    private LongWritable val = new LongWritable();
+
+    /**
+     * Sums all the individual values within the iterator and writes them to the
+     * same key.
+     * 
+     * @param key
+     *          This will be one of 2 constants: LENGTH_STR, COUNT_STR, or
+     *          SQUARE_STR.
+     * @param values
+     *          This will be an iterator of all the values associated with that
+     *          key.
+     */
+    public void reduce(Text key, Iterable<LongWritable> values, Context context)
+        throws IOException, InterruptedException {
+
+      int sum = 0;
+      for (LongWritable value : values) {
+        sum += value.get();
+      }
+      val.set(sum);
+      context.write(key, val);
+    }
+  }
+
+  /**
+   * Reads the output file and parses the summation of lengths, the word count,
+   * and the lengths squared, to perform a quick calculation of the standard
+   * deviation.
+   * 
+   * @param path
+   *          The path to find the output file in. Set in main to the output
+   *          directory.
+   * @throws IOException
+   *           If it cannot access the output directory, we throw an exception.
+   */
+  private double readAndCalcStdDev(Path path, Configuration conf)
+      throws IOException {
+    FileSystem fs = FileSystem.get(conf);
+    Path file = new Path(path, "part-r-00000");
+
+    if (!fs.exists(file))
+      throw new IOException("Output not found!");
+
+    double stddev = 0;
+    BufferedReader br = null;
+    try {
+      br = new BufferedReader(new InputStreamReader(fs.open(file)));
+      long count = 0;
+      long length = 0;
+      long square = 0;
+      String line;
+      while ((line = br.readLine()) != null) {
+        StringTokenizer st = new StringTokenizer(line);
+
+        // grab type
+        String type = st.nextToken();
+
+        // differentiate
+        if (type.equals(COUNT.toString())) {
+          String countLit = st.nextToken();
+          count = Long.parseLong(countLit);
+        } else if (type.equals(LENGTH.toString())) {
+          String lengthLit = st.nextToken();
+          length = Long.parseLong(lengthLit);
+        } else if (type.equals(SQUARE.toString())) {
+          String squareLit = st.nextToken();
+          square = Long.parseLong(squareLit);
+        }
+      }
+      // average = total sum / number of elements;
+      double mean = (((double) length) / ((double) count));
+      // standard deviation = sqrt((sum(lengths ^ 2)/count) - (mean ^ 2))
+      mean = Math.pow(mean, 2.0);
+      double term = (((double) square / ((double) count)));
+      stddev = Math.sqrt((term - mean));
+      System.out.println("The standard deviation is: " + stddev);
+    } finally {
+      br.close();
+    }
+    return stddev;
+  }
+
+  public static void main(String[] args) throws Exception {
+    ToolRunner.run(new Configuration(), new WordStandardDeviation(),
+        args);
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    if (args.length != 2) {
+      System.err.println("Usage: wordstddev <in> <out>");
+      return 0;
+    }
+
+    Configuration conf = getConf();
+
+    @SuppressWarnings("deprecation")
+    Job job = new Job(conf, "word stddev");
+    job.setJarByClass(WordStandardDeviation.class);
+    job.setMapperClass(WordStandardDeviationMapper.class);
+    job.setCombinerClass(WordStandardDeviationReducer.class);
+    job.setReducerClass(WordStandardDeviationReducer.class);
+    job.setOutputKeyClass(Text.class);
+    job.setOutputValueClass(LongWritable.class);
+    FileInputFormat.addInputPath(job, new Path(args[0]));
+    Path outputpath = new Path(args[1]);
+    FileOutputFormat.setOutputPath(job, outputpath);
+    boolean result = job.waitForCompletion(true);
+
+    // read output and calculate standard deviation
+    stddev = readAndCalcStdDev(outputpath, conf);
+
+    return (result ? 0 : 1);
+  }
+
+  public double getStandardDeviation() {
+    return stddev;
+  }
 }
\ No newline at end of file

Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/DistributedPentomino.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/DistributedPentomino.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/DistributedPentomino.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/DistributedPentomino.java Tue Oct 16 00:02:55 2012
@@ -165,16 +165,30 @@ public class DistributedPentomino extend
   }
 
   public int run(String[] args) throws Exception {
+    Configuration conf = getConf();
     if (args.length == 0) {
-      System.out.println("pentomino <output>");
+      System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]");
       ToolRunner.printGenericCommandUsage(System.out);
       return 2;
     }
-
-    Configuration conf = getConf();
-    int width = conf.getInt(Pentomino.WIDTH, PENT_WIDTH);
-    int height = conf.getInt(Pentomino.HEIGHT, PENT_HEIGHT);
-    int depth = conf.getInt(Pentomino.DEPTH, PENT_DEPTH);
+    // check for passed parameters, otherwise use defaults
+    int width = PENT_WIDTH;
+    int height = PENT_HEIGHT;
+    int depth = PENT_DEPTH;
+    for (int i = 0; i < args.length; i++) {
+      if (args[i].equalsIgnoreCase("-depth")) {
+          depth = Integer.parseInt(args[i++].trim());
+      } else if (args[i].equalsIgnoreCase("-height")) {
+	  height = Integer.parseInt(args[i++].trim());
+      } else if (args[i].equalsIgnoreCase("-width") ) {
+	  width = Integer.parseInt(args[i++].trim()); 
+      }
+    }
+    // now set the values within conf for M/R tasks to read, this
+    // will ensure values are set preventing MAPREDUCE-4678
+    conf.setInt(Pentomino.WIDTH, width);
+    conf.setInt(Pentomino.HEIGHT, height);
+    conf.setInt(Pentomino.DEPTH, depth);
     Class<? extends Pentomino> pentClass = conf.getClass(Pentomino.CLASS, 
       OneSidedPentomino.class, Pentomino.class);
     int numMaps = conf.getInt(MRJobConfig.NUM_MAPS, DEFAULT_MAPS);

Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java Tue Oct 16 00:02:55 2012
@@ -1,272 +1,272 @@
-package org.apache.hadoop.examples;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.StringTokenizer;
-import java.util.TreeMap;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.Before;
-import org.junit.Test;
-
-public class TestWordStats {
-
-  private final static String INPUT = "src/test/java/org/apache/hadoop/examples/pi/math";
-  private final static String MEAN_OUTPUT = "build/data/mean_output";
-  private final static String MEDIAN_OUTPUT = "build/data/median_output";
-  private final static String STDDEV_OUTPUT = "build/data/stddev_output";
-
-  /**
-   * Modified internal test class that is designed to read all the files in the
-   * input directory, and find the standard deviation between all of the word
-   * lengths.
-   */
-  public static class WordStdDevReader {
-    private long wordsRead = 0;
-    private long wordLengthsRead = 0;
-    private long wordLengthsReadSquared = 0;
-
-    public WordStdDevReader() {
-    }
-
-    public double read(String path) throws IOException {
-      FileSystem fs = FileSystem.get(new Configuration());
-      FileStatus[] files = fs.listStatus(new Path(path));
-
-      for (FileStatus fileStat : files) {
-        if (!fileStat.isFile())
-          continue;
-
-        BufferedReader br = null;
-
-        try {
-          br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
-
-          String line;
-          while ((line = br.readLine()) != null) {
-            StringTokenizer st = new StringTokenizer(line);
-            String word;
-            while (st.hasMoreTokens()) {
-              word = st.nextToken();
-              this.wordsRead++;
-              this.wordLengthsRead += word.length();
-              this.wordLengthsReadSquared += (long) Math.pow(word.length(), 2.0);
-            }
-          }
-
-        } catch (IOException e) {
-          System.out.println("Output could not be read!");
-          throw e;
-        } finally {
-          br.close();
-        }
-      }
-
-      double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
-      mean = Math.pow(mean, 2.0);
-      double term = (((double) this.wordLengthsReadSquared / ((double) this.wordsRead)));
-      double stddev = Math.sqrt((term - mean));
-      return stddev;
-    }
-
-  }
-
-  /**
-   * Modified internal test class that is designed to read all the files in the
-   * input directory, and find the median length of all the words.
-   */
-  public static class WordMedianReader {
-    private long wordsRead = 0;
-    private TreeMap<Integer, Integer> map = new TreeMap<Integer, Integer>();
-
-    public WordMedianReader() {
-    }
-
-    public double read(String path) throws IOException {
-      FileSystem fs = FileSystem.get(new Configuration());
-      FileStatus[] files = fs.listStatus(new Path(path));
-
-      int num = 0;
-
-      for (FileStatus fileStat : files) {
-        if (!fileStat.isFile())
-          continue;
-
-        BufferedReader br = null;
-
-        try {
-          br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
-
-          String line;
-          while ((line = br.readLine()) != null) {
-            StringTokenizer st = new StringTokenizer(line);
-            String word;
-            while (st.hasMoreTokens()) {
-              word = st.nextToken();
-              this.wordsRead++;
-              if (this.map.get(word.length()) == null) {
-                this.map.put(word.length(), 1);
-              } else {
-                int count = this.map.get(word.length());
-                this.map.put(word.length(), count + 1);
-              }
-            }
-          }
-        } catch (IOException e) {
-          System.out.println("Output could not be read!");
-          throw e;
-        } finally {
-          br.close();
-        }
-      }
-
-      int medianIndex1 = (int) Math.ceil((this.wordsRead / 2.0));
-      int medianIndex2 = (int) Math.floor((this.wordsRead / 2.0));
-
-      for (Integer key : this.map.navigableKeySet()) {
-        int prevNum = num;
-        num += this.map.get(key);
-
-        if (medianIndex2 >= prevNum && medianIndex1 <= num) {
-          return key;
-        } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
-          Integer nextCurrLen = this.map.navigableKeySet().iterator().next();
-          double median = (key + nextCurrLen) / 2.0;
-          return median;
-        }
-      }
-      return -1;
-    }
-
-  }
-
-  /**
-   * Modified internal test class that is designed to read all the files in the
-   * input directory, and find the mean length of all the words.
-   */
-  public static class WordMeanReader {
-    private long wordsRead = 0;
-    private long wordLengthsRead = 0;
-
-    public WordMeanReader() {
-    }
-
-    public double read(String path) throws IOException {
-      FileSystem fs = FileSystem.get(new Configuration());
-      FileStatus[] files = fs.listStatus(new Path(path));
-
-      for (FileStatus fileStat : files) {
-        if (!fileStat.isFile())
-          continue;
-
-        BufferedReader br = null;
-
-        try {
-          br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
-
-          String line;
-          while ((line = br.readLine()) != null) {
-            StringTokenizer st = new StringTokenizer(line);
-            String word;
-            while (st.hasMoreTokens()) {
-              word = st.nextToken();
-              this.wordsRead++;
-              this.wordLengthsRead += word.length();
-            }
-          }
-        } catch (IOException e) {
-          System.out.println("Output could not be read!");
-          throw e;
-        } finally {
-          br.close();
-        }
-      }
-
-      double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
-      return mean;
-    }
-
-  }
-
-  /**
-   * Internal class designed to delete the output directory. Meant solely for
-   * use before and after the test is run; this is so next iterations of the
-   * test do not encounter a "file already exists" error.
-   * 
-   * @param dir
-   *          The directory to delete.
-   * @return Returns whether the deletion was successful or not.
-   */
-  public static boolean deleteDir(File dir) {
-    if (dir.isDirectory()) {
-      String[] children = dir.list();
-      for (int i = 0; i < children.length; i++) {
-        boolean success = deleteDir(new File(dir, children[i]));
-        if (!success) {
-          System.out.println("Could not delete directory after test!");
-          return false;
-        }
-      }
-    }
-
-    // The directory is now empty so delete it
-    return dir.delete();
-  }
-
-  @Before public void setup() throws Exception {
-    deleteDir(new File(MEAN_OUTPUT));
-    deleteDir(new File(MEDIAN_OUTPUT));
-    deleteDir(new File(STDDEV_OUTPUT));
-  }
-
-  @Test public void testGetTheMean() throws Exception {
-    String args[] = new String[2];
-    args[0] = INPUT;
-    args[1] = MEAN_OUTPUT;
-
-    WordMean wm = new WordMean();
-    ToolRunner.run(new Configuration(), wm, args);
-    double mean = wm.getMean();
-
-    // outputs MUST match
-    WordMeanReader wr = new WordMeanReader();
-    assertEquals(mean, wr.read(INPUT), 0.0);
-  }
-
-  @Test public void testGetTheMedian() throws Exception {
-    String args[] = new String[2];
-    args[0] = INPUT;
-    args[1] = MEDIAN_OUTPUT;
-
-    WordMedian wm = new WordMedian();
-    ToolRunner.run(new Configuration(), wm, args);
-    double median = wm.getMedian();
-
-    // outputs MUST match
-    WordMedianReader wr = new WordMedianReader();
-    assertEquals(median, wr.read(INPUT), 0.0);
-  }
-
-  @Test public void testGetTheStandardDeviation() throws Exception {
-    String args[] = new String[2];
-    args[0] = INPUT;
-    args[1] = STDDEV_OUTPUT;
-
-    WordStandardDeviation wsd = new WordStandardDeviation();
-    ToolRunner.run(new Configuration(), wsd, args);
-    double stddev = wsd.getStandardDeviation();
-
-    // outputs MUST match
-    WordStdDevReader wr = new WordStdDevReader();
-    assertEquals(stddev, wr.read(INPUT), 0.0);
-  }
-
-}
+package org.apache.hadoop.examples;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+import java.util.TreeMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestWordStats {
+
+  private final static String INPUT = "src/test/java/org/apache/hadoop/examples/pi/math";
+  private final static String MEAN_OUTPUT = "build/data/mean_output";
+  private final static String MEDIAN_OUTPUT = "build/data/median_output";
+  private final static String STDDEV_OUTPUT = "build/data/stddev_output";
+
+  /**
+   * Modified internal test class that is designed to read all the files in the
+   * input directory, and find the standard deviation between all of the word
+   * lengths.
+   */
+  public static class WordStdDevReader {
+    private long wordsRead = 0;
+    private long wordLengthsRead = 0;
+    private long wordLengthsReadSquared = 0;
+
+    public WordStdDevReader() {
+    }
+
+    public double read(String path) throws IOException {
+      FileSystem fs = FileSystem.get(new Configuration());
+      FileStatus[] files = fs.listStatus(new Path(path));
+
+      for (FileStatus fileStat : files) {
+        if (!fileStat.isFile())
+          continue;
+
+        BufferedReader br = null;
+
+        try {
+          br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
+
+          String line;
+          while ((line = br.readLine()) != null) {
+            StringTokenizer st = new StringTokenizer(line);
+            String word;
+            while (st.hasMoreTokens()) {
+              word = st.nextToken();
+              this.wordsRead++;
+              this.wordLengthsRead += word.length();
+              this.wordLengthsReadSquared += (long) Math.pow(word.length(), 2.0);
+            }
+          }
+
+        } catch (IOException e) {
+          System.out.println("Output could not be read!");
+          throw e;
+        } finally {
+          br.close();
+        }
+      }
+
+      double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
+      mean = Math.pow(mean, 2.0);
+      double term = (((double) this.wordLengthsReadSquared / ((double) this.wordsRead)));
+      double stddev = Math.sqrt((term - mean));
+      return stddev;
+    }
+
+  }
+
+  /**
+   * Modified internal test class that is designed to read all the files in the
+   * input directory, and find the median length of all the words.
+   */
+  public static class WordMedianReader {
+    private long wordsRead = 0;
+    private TreeMap<Integer, Integer> map = new TreeMap<Integer, Integer>();
+
+    public WordMedianReader() {
+    }
+
+    public double read(String path) throws IOException {
+      FileSystem fs = FileSystem.get(new Configuration());
+      FileStatus[] files = fs.listStatus(new Path(path));
+
+      int num = 0;
+
+      for (FileStatus fileStat : files) {
+        if (!fileStat.isFile())
+          continue;
+
+        BufferedReader br = null;
+
+        try {
+          br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
+
+          String line;
+          while ((line = br.readLine()) != null) {
+            StringTokenizer st = new StringTokenizer(line);
+            String word;
+            while (st.hasMoreTokens()) {
+              word = st.nextToken();
+              this.wordsRead++;
+              if (this.map.get(word.length()) == null) {
+                this.map.put(word.length(), 1);
+              } else {
+                int count = this.map.get(word.length());
+                this.map.put(word.length(), count + 1);
+              }
+            }
+          }
+        } catch (IOException e) {
+          System.out.println("Output could not be read!");
+          throw e;
+        } finally {
+          br.close();
+        }
+      }
+
+      int medianIndex1 = (int) Math.ceil((this.wordsRead / 2.0));
+      int medianIndex2 = (int) Math.floor((this.wordsRead / 2.0));
+
+      for (Integer key : this.map.navigableKeySet()) {
+        int prevNum = num;
+        num += this.map.get(key);
+
+        if (medianIndex2 >= prevNum && medianIndex1 <= num) {
+          return key;
+        } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
+          Integer nextCurrLen = this.map.navigableKeySet().iterator().next();
+          double median = (key + nextCurrLen) / 2.0;
+          return median;
+        }
+      }
+      return -1;
+    }
+
+  }
+
+  /**
+   * Modified internal test class that is designed to read all the files in the
+   * input directory, and find the mean length of all the words.
+   */
+  public static class WordMeanReader {
+    private long wordsRead = 0;
+    private long wordLengthsRead = 0;
+
+    public WordMeanReader() {
+    }
+
+    public double read(String path) throws IOException {
+      FileSystem fs = FileSystem.get(new Configuration());
+      FileStatus[] files = fs.listStatus(new Path(path));
+
+      for (FileStatus fileStat : files) {
+        if (!fileStat.isFile())
+          continue;
+
+        BufferedReader br = null;
+
+        try {
+          br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
+
+          String line;
+          while ((line = br.readLine()) != null) {
+            StringTokenizer st = new StringTokenizer(line);
+            String word;
+            while (st.hasMoreTokens()) {
+              word = st.nextToken();
+              this.wordsRead++;
+              this.wordLengthsRead += word.length();
+            }
+          }
+        } catch (IOException e) {
+          System.out.println("Output could not be read!");
+          throw e;
+        } finally {
+          br.close();
+        }
+      }
+
+      double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
+      return mean;
+    }
+
+  }
+
+  /**
+   * Internal class designed to delete the output directory. Meant solely for
+   * use before and after the test is run; this is so next iterations of the
+   * test do not encounter a "file already exists" error.
+   * 
+   * @param dir
+   *          The directory to delete.
+   * @return Returns whether the deletion was successful or not.
+   */
+  public static boolean deleteDir(File dir) {
+    if (dir.isDirectory()) {
+      String[] children = dir.list();
+      for (int i = 0; i < children.length; i++) {
+        boolean success = deleteDir(new File(dir, children[i]));
+        if (!success) {
+          System.out.println("Could not delete directory after test!");
+          return false;
+        }
+      }
+    }
+
+    // The directory is now empty so delete it
+    return dir.delete();
+  }
+
+  @Before public void setup() throws Exception {
+    deleteDir(new File(MEAN_OUTPUT));
+    deleteDir(new File(MEDIAN_OUTPUT));
+    deleteDir(new File(STDDEV_OUTPUT));
+  }
+
+  @Test public void testGetTheMean() throws Exception {
+    String args[] = new String[2];
+    args[0] = INPUT;
+    args[1] = MEAN_OUTPUT;
+
+    WordMean wm = new WordMean();
+    ToolRunner.run(new Configuration(), wm, args);
+    double mean = wm.getMean();
+
+    // outputs MUST match
+    WordMeanReader wr = new WordMeanReader();
+    assertEquals(mean, wr.read(INPUT), 0.0);
+  }
+
+  @Test public void testGetTheMedian() throws Exception {
+    String args[] = new String[2];
+    args[0] = INPUT;
+    args[1] = MEDIAN_OUTPUT;
+
+    WordMedian wm = new WordMedian();
+    ToolRunner.run(new Configuration(), wm, args);
+    double median = wm.getMedian();
+
+    // outputs MUST match
+    WordMedianReader wr = new WordMedianReader();
+    assertEquals(median, wr.read(INPUT), 0.0);
+  }
+
+  @Test public void testGetTheStandardDeviation() throws Exception {
+    String args[] = new String[2];
+    args[0] = INPUT;
+    args[1] = STDDEV_OUTPUT;
+
+    WordStandardDeviation wsd = new WordStandardDeviation();
+    ToolRunner.run(new Configuration(), wsd, args);
+    double stddev = wsd.getStandardDeviation();
+
+    // outputs MUST match
+    WordStdDevReader wr = new WordStdDevReader();
+    assertEquals(stddev, wr.read(INPUT), 0.0);
+  }
+
+}

Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/pom.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/pom.xml?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/pom.xml (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/pom.xml Tue Oct 16 00:02:55 2012
@@ -219,6 +219,18 @@
           </includes>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <properties>
+            <property>
+              <name>listener</name>
+              <value>org.apache.hadoop.test.TimedOutTestsListener</value>
+            </property>
+          </properties>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 

Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/c++/
------------------------------------------------------------------------------
  Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/c++:r1363593-1396941

Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/
------------------------------------------------------------------------------
  Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/contrib:r1363593-1396941

Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/block_forensics/
------------------------------------------------------------------------------
  Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/contrib/block_forensics:r1363593-1396941

Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/build-contrib.xml
------------------------------------------------------------------------------
  Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/contrib/build-contrib.xml:r1363593-1396941

Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/build.xml
------------------------------------------------------------------------------
  Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/contrib/build.xml:r1363593-1396941

Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/data_join/
------------------------------------------------------------------------------
  Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/contrib/data_join:r1363593-1396941

Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/eclipse-plugin/
------------------------------------------------------------------------------
  Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/contrib/eclipse-plugin:r1363593-1396941

Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/
------------------------------------------------------------------------------
  Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/contrib/index:r1363593-1396941

Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data.txt?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data.txt (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data.txt Tue Oct 16 00:02:55 2012
@@ -1,10 +1,10 @@
-0 ins apache dot org
-1 ins apache
-2 ins apache
-3 ins apache
-4 ins apache
-5 ins apache
-6 ins apache
-7 ins apache
-8 ins apache
-9 ins apache
+0 ins apache dot org
+1 ins apache
+2 ins apache
+3 ins apache
+4 ins apache
+5 ins apache
+6 ins apache
+7 ins apache
+8 ins apache
+9 ins apache

Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt Tue Oct 16 00:02:55 2012
@@ -1,10 +1,10 @@
-0 del
-1 upd hadoop
-2 del
-3 upd hadoop
-4 del
-5 upd hadoop
-6 del
-7 upd hadoop
-8 del
-9 upd hadoop
+0 del
+1 upd hadoop
+2 del
+3 upd hadoop
+4 del
+5 upd hadoop
+6 del
+7 upd hadoop
+8 del
+9 upd hadoop